Commit 871183ea authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm-simd] Implement v128.const on x64, Arm64

 - Add wasm opcode, decode and compiler code for v128.const
 - Add codegen implementations for v128.const on x64/Arm64
 - Reuse/Rename some shuffle specific methods to handle generic
 128-bit immediates
 - Tests

Bug: v8:8460
Change-Id: Idc365c8f6402c13259400eac92e0b75bd0f991a1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2276176
Commit-Queue: Deepti Gandluri (OOO Till November) <gdeepti@chromium.org>
Reviewed-by: 's avatarSigurd Schneider <sigurds@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68643}
parent 59028463
...@@ -2445,8 +2445,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2445,8 +2445,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(dst.W(), tmp.V8H(), 0); __ Mov(dst.W(), tmp.V8H(), 0);
break; break;
} }
case kArm64S128Const: {
uint64_t imm1 =
i.InputInt32(0) | (static_cast<uint64_t>(i.InputInt32(1)) << 32);
uint64_t imm2 =
i.InputInt32(2) | (static_cast<uint64_t>(i.InputInt32(3)) << 32);
__ Movi(i.OutputSimd128Register().V16B(), imm2, imm1);
break;
}
case kArm64S128Zero: { case kArm64S128Zero: {
__ Movi(i.OutputSimd128Register().V16B(), 0); VRegister dst = i.OutputSimd128Register().V16B();
__ Eor(dst, dst, dst);
break; break;
} }
SIMD_BINOP_CASE(kArm64S128And, And, 16B); SIMD_BINOP_CASE(kArm64S128And, And, 16B);
......
...@@ -338,6 +338,7 @@ namespace compiler { ...@@ -338,6 +338,7 @@ namespace compiler {
V(Arm64I8x16RoundingAverageU) \ V(Arm64I8x16RoundingAverageU) \
V(Arm64I8x16Abs) \ V(Arm64I8x16Abs) \
V(Arm64I8x16BitMask) \ V(Arm64I8x16BitMask) \
V(Arm64S128Const) \
V(Arm64S128Zero) \ V(Arm64S128Zero) \
V(Arm64S128Dup) \ V(Arm64S128Dup) \
V(Arm64S128And) \ V(Arm64S128And) \
......
...@@ -308,6 +308,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -308,6 +308,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I8x16RoundingAverageU: case kArm64I8x16RoundingAverageU:
case kArm64I8x16Abs: case kArm64I8x16Abs:
case kArm64I8x16BitMask: case kArm64I8x16BitMask:
case kArm64S128Const:
case kArm64S128Zero: case kArm64S128Zero:
case kArm64S128Dup: case kArm64S128Dup:
case kArm64S128And: case kArm64S128And:
......
...@@ -3310,6 +3310,24 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -3310,6 +3310,24 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(S128Xor, kArm64S128Xor) \ V(S128Xor, kArm64S128Xor) \
V(S128AndNot, kArm64S128AndNot) V(S128AndNot, kArm64S128AndNot)
void InstructionSelector::VisitS128Const(Node* node) {
Arm64OperandGenerator g(this);
static const int kUint32Immediates = 4;
uint32_t val[kUint32Immediates];
STATIC_ASSERT(sizeof(val) == kSimd128Size);
memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
// If all bytes are zeros, avoid emitting code for generic constants
bool all_zeros = !(val[0] && val[1] && val[2] && val[3]);
InstructionOperand dst = g.DefineAsRegister(node);
if (all_zeros) {
Emit(kArm64S128Zero, dst);
} else {
Emit(kArm64S128Const, g.DefineAsRegister(node), g.UseImmediate(val[0]),
g.UseImmediate(val[1]), g.UseImmediate(val[2]),
g.UseImmediate(val[3]));
}
}
void InstructionSelector::VisitS128Zero(Node* node) { void InstructionSelector::VisitS128Zero(Node* node) {
Arm64OperandGenerator g(this); Arm64OperandGenerator g(this);
Emit(kArm64S128Zero, g.DefineAsRegister(node)); Emit(kArm64S128Zero, g.DefineAsRegister(node));
......
...@@ -2189,6 +2189,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -2189,6 +2189,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI8x16Abs(node); return MarkAsSimd128(node), VisitI8x16Abs(node);
case IrOpcode::kI8x16BitMask: case IrOpcode::kI8x16BitMask:
return MarkAsWord32(node), VisitI8x16BitMask(node); return MarkAsWord32(node), VisitI8x16BitMask(node);
case IrOpcode::kS128Const:
return MarkAsSimd128(node), VisitS128Const(node);
case IrOpcode::kS128Zero: case IrOpcode::kS128Zero:
return MarkAsSimd128(node), VisitS128Zero(node); return MarkAsSimd128(node), VisitS128Zero(node);
case IrOpcode::kS128And: case IrOpcode::kS128And:
...@@ -2710,6 +2712,10 @@ void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); } ...@@ -2710,6 +2712,10 @@ void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_ARM // && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitS128Const(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) { void InstructionSelector::VisitParameter(Node* node) {
...@@ -3198,7 +3204,7 @@ void InstructionSelector::CanonicalizeShuffle(bool inputs_equal, ...@@ -3198,7 +3204,7 @@ void InstructionSelector::CanonicalizeShuffle(bool inputs_equal,
void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle, void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
bool* is_swizzle) { bool* is_swizzle) {
// Get raw shuffle indices. // Get raw shuffle indices.
memcpy(shuffle, S8x16ShuffleParameterOf(node->op()).data(), kSimd128Size); memcpy(shuffle, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
bool needs_swap; bool needs_swap;
bool inputs_equal = GetVirtualRegister(node->InputAt(0)) == bool inputs_equal = GetVirtualRegister(node->InputAt(0)) ==
GetVirtualRegister(node->InputAt(1)); GetVirtualRegister(node->InputAt(1));
......
...@@ -757,13 +757,13 @@ void AdjustStackPointerForTailCall(TurboAssembler* assembler, ...@@ -757,13 +757,13 @@ void AdjustStackPointerForTailCall(TurboAssembler* assembler,
} }
} }
void SetupShuffleMaskInTempRegister(TurboAssembler* assembler, uint32_t* mask, void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
XMMRegister tmp) { XMMRegister reg) {
uint64_t shuffle_mask = (mask[0]) | (uint64_t{mask[1]} << 32); uint64_t value = (imms[0]) | (uint64_t{imms[1]} << 32);
assembler->Move(tmp, shuffle_mask); assembler->Move(reg, value);
shuffle_mask = (mask[2]) | (uint64_t{mask[3]} << 32); value = (imms[2]) | (uint64_t{imms[3]} << 32);
assembler->movq(kScratchRegister, shuffle_mask); assembler->movq(kScratchRegister, value);
assembler->Pinsrq(tmp, kScratchRegister, int8_t{1}); assembler->Pinsrq(reg, kScratchRegister, int8_t{1});
} }
} // namespace } // namespace
...@@ -3191,11 +3191,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3191,11 +3191,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64S128Const: {
// Emit code for generic constants as all zeros, or ones cases will be
// handled separately by the selector.
XMMRegister dst = i.OutputSimd128Register();
uint32_t imm[4] = {};
for (int j = 0; j < 4; j++) {
imm[j] = i.InputUint32(j);
}
SetupSimdImmediateInRegister(tasm(), imm, dst);
break;
}
case kX64S128Zero: { case kX64S128Zero: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Xorps(dst, dst); __ Xorps(dst, dst);
break; break;
} }
case kX64S128AllOnes: {
XMMRegister dst = i.OutputSimd128Register();
__ Pcmpeqd(dst, dst);
break;
}
case kX64I16x8Splat: { case kX64I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) { if (HasRegisterInput(instr, 0)) {
...@@ -3728,7 +3744,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3728,7 +3744,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
mask[j - 1] = i.InputUint32(j); mask[j - 1] = i.InputUint32(j);
} }
SetupShuffleMaskInTempRegister(tasm(), mask, tmp_simd); SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
__ Pshufb(dst, tmp_simd); __ Pshufb(dst, tmp_simd);
} else { // two input operands } else { // two input operands
DCHECK_EQ(6, instr->InputCount()); DCHECK_EQ(6, instr->InputCount());
...@@ -3741,7 +3757,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3741,7 +3757,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k; mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
} }
} }
SetupShuffleMaskInTempRegister(tasm(), mask1, tmp_simd); SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
__ Pshufb(kScratchDoubleReg, tmp_simd); __ Pshufb(kScratchDoubleReg, tmp_simd);
uint32_t mask2[4] = {}; uint32_t mask2[4] = {};
if (instr->InputAt(1)->IsSimd128Register()) { if (instr->InputAt(1)->IsSimd128Register()) {
...@@ -3757,7 +3773,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3757,7 +3773,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k; mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
} }
} }
SetupShuffleMaskInTempRegister(tasm(), mask2, tmp_simd); SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
__ Pshufb(dst, tmp_simd); __ Pshufb(dst, tmp_simd);
__ Por(dst, kScratchDoubleReg); __ Por(dst, kScratchDoubleReg);
} }
......
...@@ -316,7 +316,9 @@ namespace compiler { ...@@ -316,7 +316,9 @@ namespace compiler {
V(X64I8x16RoundingAverageU) \ V(X64I8x16RoundingAverageU) \
V(X64I8x16Abs) \ V(X64I8x16Abs) \
V(X64I8x16BitMask) \ V(X64I8x16BitMask) \
V(X64S128Const) \
V(X64S128Zero) \ V(X64S128Zero) \
V(X64S128AllOnes) \
V(X64S128Not) \ V(X64S128Not) \
V(X64S128And) \ V(X64S128And) \
V(X64S128Or) \ V(X64S128Or) \
......
...@@ -293,7 +293,9 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -293,7 +293,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S128Xor: case kX64S128Xor:
case kX64S128Not: case kX64S128Not:
case kX64S128Select: case kX64S128Select:
case kX64S128Const:
case kX64S128Zero: case kX64S128Zero:
case kX64S128AllOnes:
case kX64S128AndNot: case kX64S128AndNot:
case kX64V64x2AnyTrue: case kX64V64x2AnyTrue:
case kX64V64x2AllTrue: case kX64V64x2AllTrue:
......
...@@ -2809,6 +2809,31 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2809,6 +2809,31 @@ VISIT_ATOMIC_BINOP(Xor)
V(V16x8AllTrue) \ V(V16x8AllTrue) \
V(V8x16AllTrue) V(V8x16AllTrue)
void InstructionSelector::VisitS128Const(Node* node) {
X64OperandGenerator g(this);
static const int kUint32Immediates = 4;
uint32_t val[kUint32Immediates];
STATIC_ASSERT(sizeof(val) == kSimd128Size);
memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
// If all bytes are zeros or ones, avoid emitting code for generic constants
bool all_zeros = !(val[0] && val[1] && val[2] && val[3]);
bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
val[2] == UINT32_MAX && val[3] == UINT32_MAX;
InstructionOperand dst = g.DefineAsRegister(node);
if (all_zeros) {
Emit(kX64S128Zero, dst);
} else if (all_ones) {
Emit(kX64S128AllOnes, dst);
} else {
InstructionOperand inputs[kUint32Immediates];
for (int i = 0; i < kUint32Immediates; ++i) {
inputs[i] = g.UseImmediate(val[i]);
}
InstructionOperand temp(g.TempSimd128Register());
Emit(kX64S128Const, 1, &dst, kUint32Immediates, inputs, 1, &temp);
}
}
void InstructionSelector::VisitS128Zero(Node* node) { void InstructionSelector::VisitS128Zero(Node* node) {
X64OperandGenerator g(this); X64OperandGenerator g(this);
Emit(kX64S128Zero, g.DefineAsRegister(node)); Emit(kX64S128Zero, g.DefineAsRegister(node));
......
...@@ -1499,21 +1499,21 @@ const Operator* MachineOperatorBuilder::I64x2ReplaceLaneI32Pair( ...@@ -1499,21 +1499,21 @@ const Operator* MachineOperatorBuilder::I64x2ReplaceLaneI32Pair(
"Replace lane", 3, 0, 0, 1, 0, 0, lane_index); "Replace lane", 3, 0, 0, 1, 0, 0, lane_index);
} }
bool operator==(S8x16ShuffleParameter const& lhs, bool operator==(S128ImmediateParameter const& lhs,
S8x16ShuffleParameter const& rhs) { S128ImmediateParameter const& rhs) {
return (lhs.shuffle() == rhs.shuffle()); return (lhs.immediate() == rhs.immediate());
} }
bool operator!=(S8x16ShuffleParameter const& lhs, bool operator!=(S128ImmediateParameter const& lhs,
S8x16ShuffleParameter const& rhs) { S128ImmediateParameter const& rhs) {
return !(lhs == rhs); return !(lhs == rhs);
} }
size_t hash_value(S8x16ShuffleParameter const& p) { size_t hash_value(S128ImmediateParameter const& p) {
return base::hash_range(p.shuffle().begin(), p.shuffle().end()); return base::hash_range(p.immediate().begin(), p.immediate().end());
} }
std::ostream& operator<<(std::ostream& os, S8x16ShuffleParameter const& p) { std::ostream& operator<<(std::ostream& os, S128ImmediateParameter const& p) {
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
const char* separator = (i < 15) ? "," : ""; const char* separator = (i < 15) ? "," : "";
os << static_cast<uint32_t>(p[i]) << separator; os << static_cast<uint32_t>(p[i]) << separator;
...@@ -1521,16 +1521,23 @@ std::ostream& operator<<(std::ostream& os, S8x16ShuffleParameter const& p) { ...@@ -1521,16 +1521,23 @@ std::ostream& operator<<(std::ostream& os, S8x16ShuffleParameter const& p) {
return os; return os;
} }
S8x16ShuffleParameter const& S8x16ShuffleParameterOf(Operator const* op) { S128ImmediateParameter const& S128ImmediateParameterOf(Operator const* op) {
DCHECK_EQ(IrOpcode::kS8x16Shuffle, op->opcode()); DCHECK(IrOpcode::kS8x16Shuffle == op->opcode() ||
return OpParameter<S8x16ShuffleParameter>(op); IrOpcode::kS128Const == op->opcode());
return OpParameter<S128ImmediateParameter>(op);
}
const Operator* MachineOperatorBuilder::S128Const(const uint8_t value[16]) {
return new (zone_) Operator1<S128ImmediateParameter>(
IrOpcode::kS128Const, Operator::kPure, "Immediate", 0, 0, 0, 1, 0, 0,
S128ImmediateParameter(value));
} }
const Operator* MachineOperatorBuilder::S8x16Shuffle( const Operator* MachineOperatorBuilder::S8x16Shuffle(
const uint8_t shuffle[16]) { const uint8_t shuffle[16]) {
return new (zone_) Operator1<S8x16ShuffleParameter>( return new (zone_) Operator1<S128ImmediateParameter>(
IrOpcode::kS8x16Shuffle, Operator::kPure, "Shuffle", 2, 0, 0, 1, 0, 0, IrOpcode::kS8x16Shuffle, Operator::kPure, "Shuffle", 2, 0, 0, 1, 0, 0,
S8x16ShuffleParameter(shuffle)); S128ImmediateParameter(shuffle));
} }
StackCheckKind StackCheckKindOf(Operator const* op) { StackCheckKind StackCheckKindOf(Operator const* op) {
......
...@@ -152,30 +152,30 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op) ...@@ -152,30 +152,30 @@ MachineRepresentation AtomicStoreRepresentationOf(Operator const* op)
MachineType AtomicOpType(Operator const* op) V8_WARN_UNUSED_RESULT; MachineType AtomicOpType(Operator const* op) V8_WARN_UNUSED_RESULT;
class S8x16ShuffleParameter { class S128ImmediateParameter {
public: public:
explicit S8x16ShuffleParameter(const uint8_t shuffle[16]) { explicit S128ImmediateParameter(const uint8_t immediate[16]) {
std::copy(shuffle, shuffle + 16, shuffle_.begin()); std::copy(immediate, immediate + 16, immediate_.begin());
} }
const std::array<uint8_t, 16>& shuffle() const { return shuffle_; } const std::array<uint8_t, 16>& immediate() const { return immediate_; }
const uint8_t* data() const { return shuffle_.data(); } const uint8_t* data() const { return immediate_.data(); }
uint8_t operator[](int x) const { return shuffle_[x]; } uint8_t operator[](int x) const { return immediate_[x]; }
private: private:
std::array<uint8_t, 16> shuffle_; std::array<uint8_t, 16> immediate_;
}; };
V8_EXPORT_PRIVATE bool operator==(S8x16ShuffleParameter const& lhs, V8_EXPORT_PRIVATE bool operator==(S128ImmediateParameter const& lhs,
S8x16ShuffleParameter const& rhs); S128ImmediateParameter const& rhs);
bool operator!=(S8x16ShuffleParameter const& lhs, bool operator!=(S128ImmediateParameter const& lhs,
S8x16ShuffleParameter const& rhs); S128ImmediateParameter const& rhs);
size_t hash_value(S8x16ShuffleParameter const& p); size_t hash_value(S128ImmediateParameter const& p);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
S8x16ShuffleParameter const&); S128ImmediateParameter const&);
V8_EXPORT_PRIVATE S8x16ShuffleParameter const& S8x16ShuffleParameterOf( V8_EXPORT_PRIVATE S128ImmediateParameter const& S128ImmediateParameterOf(
Operator const* op) V8_WARN_UNUSED_RESULT; Operator const* op) V8_WARN_UNUSED_RESULT;
StackCheckKind StackCheckKindOf(Operator const* op) V8_WARN_UNUSED_RESULT; StackCheckKind StackCheckKindOf(Operator const* op) V8_WARN_UNUSED_RESULT;
...@@ -737,6 +737,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -737,6 +737,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128Load(); const Operator* S128Load();
const Operator* S128Store(); const Operator* S128Store();
const Operator* S128Const(const uint8_t value[16]);
const Operator* S128Zero(); const Operator* S128Zero();
const Operator* S128And(); const Operator* S128And();
......
...@@ -939,6 +939,7 @@ ...@@ -939,6 +939,7 @@
V(S128Load) \ V(S128Load) \
V(S128Store) \ V(S128Store) \
V(S128Zero) \ V(S128Zero) \
V(S128Const) \
V(S128Not) \ V(S128Not) \
V(S128And) \ V(S128And) \
V(S128Or) \ V(S128Or) \
......
...@@ -1657,7 +1657,7 @@ void SimdScalarLowering::LowerNode(Node* node) { ...@@ -1657,7 +1657,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
} }
case IrOpcode::kS8x16Shuffle: { case IrOpcode::kS8x16Shuffle: {
DCHECK_EQ(2, node->InputCount()); DCHECK_EQ(2, node->InputCount());
S8x16ShuffleParameter shuffle = S8x16ShuffleParameterOf(node->op()); S128ImmediateParameter shuffle = S128ImmediateParameterOf(node->op());
Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type); Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type); Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
Node** rep_node = zone()->NewArray<Node*>(16); Node** rep_node = zone()->NewArray<Node*>(16);
......
...@@ -976,6 +976,11 @@ Node* WasmGraphBuilder::Float64Constant(double value) { ...@@ -976,6 +976,11 @@ Node* WasmGraphBuilder::Float64Constant(double value) {
return mcgraph()->Float64Constant(value); return mcgraph()->Float64Constant(value);
} }
Node* WasmGraphBuilder::Simd128Constant(const uint8_t value[16]) {
has_simd_ = true;
return graph()->NewNode(mcgraph()->machine()->S128Const(value));
}
namespace { namespace {
Node* Branch(MachineGraph* mcgraph, Node* cond, Node** true_node, Node* Branch(MachineGraph* mcgraph, Node* cond, Node** true_node,
Node** false_node, Node* control, BranchHint hint) { Node** false_node, Node* control, BranchHint hint) {
......
...@@ -199,6 +199,7 @@ class WasmGraphBuilder { ...@@ -199,6 +199,7 @@ class WasmGraphBuilder {
Node* IntPtrConstant(intptr_t value); Node* IntPtrConstant(intptr_t value);
Node* Float32Constant(float value); Node* Float32Constant(float value);
Node* Float64Constant(double value); Node* Float64Constant(double value);
Node* Simd128Constant(const uint8_t value[16]);
Node* Binop(wasm::WasmOpcode opcode, Node* left, Node* right, Node* Binop(wasm::WasmOpcode opcode, Node* left, Node* right,
wasm::WasmCodePosition position = wasm::kNoCodePosition); wasm::WasmCodePosition position = wasm::kNoCodePosition);
Node* Unop(wasm::WasmOpcode opcode, Node* input, Node* Unop(wasm::WasmOpcode opcode, Node* input,
......
...@@ -2947,8 +2947,13 @@ class LiftoffCompiler { ...@@ -2947,8 +2947,13 @@ class LiftoffCompiler {
} }
} }
void S128Const(FullDecoder* decoder, const Simd128Immediate<validate>& imm,
Value* result) {
unsupported(decoder, kSimd, "simd");
}
void Simd8x16ShuffleOp(FullDecoder* decoder, void Simd8x16ShuffleOp(FullDecoder* decoder,
const Simd8x16ShuffleImmediate<validate>& imm, const Simd128Immediate<validate>& imm,
const Value& input0, const Value& input1, const Value& input0, const Value& input1,
Value* result) { Value* result) {
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128); static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
...@@ -2956,7 +2961,7 @@ class LiftoffCompiler { ...@@ -2956,7 +2961,7 @@ class LiftoffCompiler {
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs)); LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs));
LiftoffRegister dst = __ GetUnusedRegister(result_rc, {lhs, rhs}, {}); LiftoffRegister dst = __ GetUnusedRegister(result_rc, {lhs, rhs}, {});
__ LiftoffAssembler::emit_s8x16_shuffle(dst, lhs, rhs, imm.shuffle); __ LiftoffAssembler::emit_s8x16_shuffle(dst, lhs, rhs, imm.value);
__ PushRegister(kWasmS128, dst); __ PushRegister(kWasmS128, dst);
} }
......
...@@ -649,12 +649,12 @@ struct SimdLaneImmediate { ...@@ -649,12 +649,12 @@ struct SimdLaneImmediate {
// Immediate for SIMD S8x16 shuffle operations. // Immediate for SIMD S8x16 shuffle operations.
template <Decoder::ValidateFlag validate> template <Decoder::ValidateFlag validate>
struct Simd8x16ShuffleImmediate { struct Simd128Immediate {
uint8_t shuffle[kSimd128Size] = {0}; uint8_t value[kSimd128Size] = {0};
inline Simd8x16ShuffleImmediate(Decoder* decoder, const byte* pc) { inline Simd128Immediate(Decoder* decoder, const byte* pc) {
for (uint32_t i = 0; i < kSimd128Size; ++i) { for (uint32_t i = 0; i < kSimd128Size; ++i) {
shuffle[i] = decoder->read_u8<validate>(pc + i, "shuffle"); value[i] = decoder->read_u8<validate>(pc + i, "value");
} }
} }
}; };
...@@ -921,7 +921,8 @@ struct ControlBase { ...@@ -921,7 +921,8 @@ struct ControlBase {
F(SimdOp, WasmOpcode opcode, Vector<Value> args, Value* result) \ F(SimdOp, WasmOpcode opcode, Vector<Value> args, Value* result) \
F(SimdLaneOp, WasmOpcode opcode, const SimdLaneImmediate<validate>& imm, \ F(SimdLaneOp, WasmOpcode opcode, const SimdLaneImmediate<validate>& imm, \
const Vector<Value> inputs, Value* result) \ const Vector<Value> inputs, Value* result) \
F(Simd8x16ShuffleOp, const Simd8x16ShuffleImmediate<validate>& imm, \ F(S128Const, const Simd128Immediate<validate>& imm, Value* result) \
F(Simd8x16ShuffleOp, const Simd128Immediate<validate>& imm, \
const Value& input0, const Value& input1, Value* result) \ const Value& input0, const Value& input1, Value* result) \
F(Throw, const ExceptionIndexImmediate<validate>& imm, \ F(Throw, const ExceptionIndexImmediate<validate>& imm, \
const Vector<Value>& args) \ const Vector<Value>& args) \
...@@ -1307,11 +1308,10 @@ class WasmDecoder : public Decoder { ...@@ -1307,11 +1308,10 @@ class WasmDecoder : public Decoder {
} }
} }
inline bool Validate(const byte* pc, inline bool Validate(const byte* pc, Simd128Immediate<validate>& imm) {
Simd8x16ShuffleImmediate<validate>& imm) {
uint8_t max_lane = 0; uint8_t max_lane = 0;
for (uint32_t i = 0; i < kSimd128Size; ++i) { for (uint32_t i = 0; i < kSimd128Size; ++i) {
max_lane = std::max(max_lane, imm.shuffle[i]); max_lane = std::max(max_lane, imm.value[i]);
} }
// Shuffle indices must be in [0..31] for a 16 lane shuffle. // Shuffle indices must be in [0..31] for a 16 lane shuffle.
if (!VALIDATE(max_lane < 2 * kSimd128Size)) { if (!VALIDATE(max_lane < 2 * kSimd128Size)) {
...@@ -1632,6 +1632,7 @@ class WasmDecoder : public Decoder { ...@@ -1632,6 +1632,7 @@ class WasmDecoder : public Decoder {
return 1 + length + imm.length; return 1 + length + imm.length;
} }
// Shuffles require a byte per lane, or 16 immediate bytes. // Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS128Const:
case kExprS8x16Shuffle: case kExprS8x16Shuffle:
return 1 + length + kSimd128Size; return 1 + length + kSimd128Size;
default: default:
...@@ -1815,6 +1816,8 @@ class WasmDecoder : public Decoder { ...@@ -1815,6 +1816,8 @@ class WasmDecoder : public Decoder {
FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(DECLARE_OPCODE_CASE) FOREACH_SIMD_1_OPERAND_2_PARAM_OPCODE(DECLARE_OPCODE_CASE)
FOREACH_SIMD_MASK_OPERAND_OPCODE(DECLARE_OPCODE_CASE) FOREACH_SIMD_MASK_OPERAND_OPCODE(DECLARE_OPCODE_CASE)
return {2, 1}; return {2, 1};
FOREACH_SIMD_CONST_OPCODE(DECLARE_OPCODE_CASE)
return {0, 1};
default: { default: {
sig = WasmOpcodes::Signature(opcode); sig = WasmOpcodes::Signature(opcode);
if (sig) { if (sig) {
...@@ -3102,6 +3105,13 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -3102,6 +3105,13 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return this->ok(); return this->ok();
} }
uint32_t SimdConstOp(uint32_t opcode_length) {
Simd128Immediate<validate> imm(this, this->pc_ + opcode_length);
auto* result = Push(kWasmS128);
CALL_INTERFACE_IF_REACHABLE(S128Const, imm, result);
return opcode_length + kSimd128Size;
}
uint32_t SimdExtractLane(WasmOpcode opcode, ValueType type, uint32_t SimdExtractLane(WasmOpcode opcode, ValueType type,
uint32_t opcode_length) { uint32_t opcode_length) {
SimdLaneImmediate<validate> imm(this, this->pc_ + opcode_length); SimdLaneImmediate<validate> imm(this, this->pc_ + opcode_length);
...@@ -3130,7 +3140,7 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -3130,7 +3140,7 @@ class WasmFullDecoder : public WasmDecoder<validate> {
} }
uint32_t Simd8x16ShuffleOp(uint32_t opcode_length) { uint32_t Simd8x16ShuffleOp(uint32_t opcode_length) {
Simd8x16ShuffleImmediate<validate> imm(this, this->pc_ + opcode_length); Simd128Immediate<validate> imm(this, this->pc_ + opcode_length);
if (this->Validate(this->pc_ + opcode_length, imm)) { if (this->Validate(this->pc_ + opcode_length, imm)) {
Value input1 = Pop(1, kWasmS128); Value input1 = Pop(1, kWasmS128);
Value input0 = Pop(0, kWasmS128); Value input0 = Pop(0, kWasmS128);
...@@ -3211,6 +3221,8 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -3211,6 +3221,8 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return DecodeLoadTransformMem(LoadType::kI64Load32U, return DecodeLoadTransformMem(LoadType::kI64Load32U,
LoadTransformationKind::kExtend, LoadTransformationKind::kExtend,
opcode_length); opcode_length);
case kExprS128Const:
return SimdConstOp(opcode_length);
default: { default: {
if (!FLAG_wasm_simd_post_mvp && if (!FLAG_wasm_simd_post_mvp &&
WasmOpcodes::IsSimdPostMvpOpcode(opcode)) { WasmOpcodes::IsSimdPostMvpOpcode(opcode)) {
......
...@@ -262,6 +262,11 @@ class WasmGraphBuildingInterface { ...@@ -262,6 +262,11 @@ class WasmGraphBuildingInterface {
result->node = builder_->Float64Constant(value); result->node = builder_->Float64Constant(value);
} }
void S128Const(FullDecoder* decoder, const Simd128Immediate<validate>& imm,
Value* result) {
result->node = builder_->Simd128Constant(imm.value);
}
void RefNull(FullDecoder* decoder, Value* result) { void RefNull(FullDecoder* decoder, Value* result) {
result->node = builder_->RefNull(); result->node = builder_->RefNull();
} }
...@@ -502,11 +507,11 @@ class WasmGraphBuildingInterface { ...@@ -502,11 +507,11 @@ class WasmGraphBuildingInterface {
} }
void Simd8x16ShuffleOp(FullDecoder* decoder, void Simd8x16ShuffleOp(FullDecoder* decoder,
const Simd8x16ShuffleImmediate<validate>& imm, const Simd128Immediate<validate>& imm,
const Value& input0, const Value& input1, const Value& input0, const Value& input1,
Value* result) { Value* result) {
TFNode* input_nodes[] = {input0.node, input1.node}; TFNode* input_nodes[] = {input0.node, input1.node};
result->node = BUILD(Simd8x16ShuffleOp, imm.shuffle, input_nodes); result->node = BUILD(Simd8x16ShuffleOp, imm.value, input_nodes);
} }
void Throw(FullDecoder* decoder, const ExceptionIndexImmediate<validate>& imm, void Throw(FullDecoder* decoder, const ExceptionIndexImmediate<validate>& imm,
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "src/utils/vector.h" #include "src/utils/vector.h"
#include "src/wasm/leb-helper.h" #include "src/wasm/leb-helper.h"
#include "src/wasm/local-decl-encoder.h" #include "src/wasm/local-decl-encoder.h"
#include "src/wasm/value-type.h"
#include "src/wasm/wasm-module.h" #include "src/wasm/wasm-module.h"
#include "src/wasm/wasm-opcodes.h" #include "src/wasm/wasm-opcodes.h"
#include "src/wasm/wasm-result.h" #include "src/wasm/wasm-result.h"
...@@ -172,6 +173,7 @@ class V8_EXPORT_PRIVATE WasmFunctionBuilder : public ZoneObject { ...@@ -172,6 +173,7 @@ class V8_EXPORT_PRIVATE WasmFunctionBuilder : public ZoneObject {
void EmitI64Const(int64_t val); void EmitI64Const(int64_t val);
void EmitF32Const(float val); void EmitF32Const(float val);
void EmitF64Const(double val); void EmitF64Const(double val);
void EmitS128Const(Simd128 val);
void EmitWithU8(WasmOpcode opcode, const byte immediate); void EmitWithU8(WasmOpcode opcode, const byte immediate);
void EmitWithU8U8(WasmOpcode opcode, const byte imm1, const byte imm2); void EmitWithU8U8(WasmOpcode opcode, const byte imm1, const byte imm2);
void EmitWithI32V(WasmOpcode opcode, int32_t immediate); void EmitWithI32V(WasmOpcode opcode, int32_t immediate);
......
...@@ -172,6 +172,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -172,6 +172,7 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(INT, LoadMem16, "load16") CASE_SIGN_OP(INT, LoadMem16, "load16")
CASE_SIGN_OP(I64, LoadMem32, "load32") CASE_SIGN_OP(I64, LoadMem32, "load32")
CASE_S128_OP(LoadMem, "load128") CASE_S128_OP(LoadMem, "load128")
CASE_S128_OP(Const, "const")
CASE_ALL_OP(StoreMem, "store") CASE_ALL_OP(StoreMem, "store")
CASE_INT_OP(StoreMem8, "store8") CASE_INT_OP(StoreMem8, "store8")
CASE_INT_OP(StoreMem16, "store16") CASE_INT_OP(StoreMem16, "store16")
......
...@@ -283,6 +283,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&); ...@@ -283,6 +283,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(S64x2LoadSplat, 0xfd0a, s_i) \ V(S64x2LoadSplat, 0xfd0a, s_i) \
V(S128StoreMem, 0xfd0b, v_is) V(S128StoreMem, 0xfd0b, v_is)
#define FOREACH_SIMD_CONST_OPCODE(V) V(S128Const, 0xfd0c, _)
#define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) V(S8x16Shuffle, 0xfd0d, s_ss) #define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) V(S8x16Shuffle, 0xfd0d, s_ss)
#define FOREACH_SIMD_MVP_0_OPERAND_OPCODE(V) \ #define FOREACH_SIMD_MVP_0_OPERAND_OPCODE(V) \
...@@ -639,6 +641,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&); ...@@ -639,6 +641,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
FOREACH_SIMD_1_OPERAND_OPCODE(V) \ FOREACH_SIMD_1_OPERAND_OPCODE(V) \
FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \ FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \
FOREACH_SIMD_MEM_OPCODE(V) \ FOREACH_SIMD_MEM_OPCODE(V) \
FOREACH_SIMD_CONST_OPCODE(V) \
FOREACH_ATOMIC_OPCODE(V) \ FOREACH_ATOMIC_OPCODE(V) \
FOREACH_ATOMIC_0_OPERAND_OPCODE(V) \ FOREACH_ATOMIC_0_OPERAND_OPCODE(V) \
FOREACH_NUMERIC_OPCODE(V) \ FOREACH_NUMERIC_OPCODE(V) \
......
...@@ -3677,6 +3677,47 @@ WASM_SIMD_TEST(BitSelect) { ...@@ -3677,6 +3677,47 @@ WASM_SIMD_TEST(BitSelect) {
DCHECK_EQ(0x01020304, r.Call(0xFFFFFFFF)); DCHECK_EQ(0x01020304, r.Call(0xFFFFFFFF));
} }
void RunSimdConstTest(ExecutionTier execution_tier, LowerSimd lower_simd,
const std::array<uint8_t, kSimd128Size>& expected) {
WasmRunner<uint32_t> r(execution_tier, lower_simd);
byte temp1 = r.AllocateLocal(kWasmS128);
uint8_t* src0 = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r, WASM_SET_GLOBAL(temp1, WASM_SIMD_CONSTANT(expected)), WASM_ONE);
CHECK_EQ(1, r.Call());
for (size_t i = 0; i < expected.size(); i++) {
CHECK_EQ(ReadLittleEndianValue<uint8_t>(&src0[i]), expected[i]);
}
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(S128Const) {
std::array<uint8_t, kSimd128Size> expected;
// Test for generic constant
for (int i = 0; i < kSimd128Size; i++) {
expected[i] = i;
}
RunSimdConstTest(execution_tier, lower_simd, expected);
}
WASM_SIMD_TEST_NO_LOWERING(S128ConstAllZero) {
std::array<uint8_t, kSimd128Size> expected;
// Test for generic constant
for (int i = 0; i < kSimd128Size; i++) {
expected[i] = 0;
}
RunSimdConstTest(execution_tier, lower_simd, expected);
}
WASM_SIMD_TEST_NO_LOWERING(S128ConstAllOnes) {
std::array<uint8_t, kSimd128Size> expected;
// Test for generic constant
for (int i = 0; i < kSimd128Size; i++) {
expected[i] = 0xff;
}
RunSimdConstTest(execution_tier, lower_simd, expected);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunI8x16MixedRelationalOpTest(ExecutionTier execution_tier, void RunI8x16MixedRelationalOpTest(ExecutionTier execution_tier,
LowerSimd lower_simd, WasmOpcode opcode, LowerSimd lower_simd, WasmOpcode opcode,
Int8BinOp expected_op) { Int8BinOp expected_op) {
......
...@@ -2549,6 +2549,17 @@ class WasmInterpreterInternals { ...@@ -2549,6 +2549,17 @@ class WasmInterpreterInternals {
Push(WasmValue(Simd128(res))); Push(WasmValue(Simd128(res)));
return true; return true;
} }
case kExprS128Const: {
Simd128Immediate<Decoder::kNoValidate> imm(decoder,
code->at(pc + *len));
int16 res;
for (size_t i = 0; i < kSimd128Size; ++i) {
res.val[LANE(i, res)] = imm.value[i];
}
Push(WasmValue(Simd128(res)));
*len += 16;
return true;
}
case kExprS8x16Swizzle: { case kExprS8x16Swizzle: {
int16 v2 = Pop().to_s128().to_i8x16(); int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16(); int16 v1 = Pop().to_s128().to_i8x16();
...@@ -2562,14 +2573,14 @@ class WasmInterpreterInternals { ...@@ -2562,14 +2573,14 @@ class WasmInterpreterInternals {
return true; return true;
} }
case kExprS8x16Shuffle: { case kExprS8x16Shuffle: {
Simd8x16ShuffleImmediate<Decoder::kNoValidate> imm(decoder, Simd128Immediate<Decoder::kNoValidate> imm(decoder,
code->at(pc + *len)); code->at(pc + *len));
*len += 16; *len += 16;
int16 v2 = Pop().to_s128().to_i8x16(); int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16(); int16 v1 = Pop().to_s128().to_i8x16();
int16 res; int16 res;
for (size_t i = 0; i < kSimd128Size; ++i) { for (size_t i = 0; i < kSimd128Size; ++i) {
int lane = imm.shuffle[i]; int lane = imm.value[i];
res.val[LANE(i, v1)] = lane < kSimd128Size res.val[LANE(i, v1)] = lane < kSimd128Size
? v1.val[LANE(lane, v1)] ? v1.val[LANE(lane, v1)]
: v2.val[LANE(lane - kSimd128Size, v1)]; : v2.val[LANE(lane - kSimd128Size, v1)];
......
...@@ -792,6 +792,12 @@ inline WasmOpcode LoadStoreOpcodeOf(MachineType type, bool store) { ...@@ -792,6 +792,12 @@ inline WasmOpcode LoadStoreOpcodeOf(MachineType type, bool store) {
#define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \ #define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \
x, y, WASM_SIMD_OP(op), TO_BYTE(bytes) x, y, WASM_SIMD_OP(op), TO_BYTE(bytes)
#define WASM_SIMD_SELECT(format, x, y, z) x, y, z, WASM_SIMD_OP(kExprS128Select) #define WASM_SIMD_SELECT(format, x, y, z) x, y, z, WASM_SIMD_OP(kExprS128Select)
#define WASM_SIMD_CONSTANT(v) \
WASM_SIMD_OP(kExprS128Const), TO_BYTE(v[0]), TO_BYTE(v[1]), TO_BYTE(v[2]), \
TO_BYTE(v[3]), TO_BYTE(v[4]), TO_BYTE(v[5]), TO_BYTE(v[6]), \
TO_BYTE(v[7]), TO_BYTE(v[8]), TO_BYTE(v[9]), TO_BYTE(v[10]), \
TO_BYTE(v[11]), TO_BYTE(v[12]), TO_BYTE(v[13]), TO_BYTE(v[14]), \
TO_BYTE(v[15])
#define WASM_SIMD_F64x2_SPLAT(x) WASM_SIMD_SPLAT(F64x2, x) #define WASM_SIMD_F64x2_SPLAT(x) WASM_SIMD_SPLAT(F64x2, x)
#define WASM_SIMD_F64x2_EXTRACT_LANE(lane, x) \ #define WASM_SIMD_F64x2_EXTRACT_LANE(lane, x) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment