Commit 673be63e authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype load lane

Load lane loads a value from memory and replaces a single lane of a
simd value.

This implements the load (no stores yet) for x64 and interpreter.

Bug: v8:10975
Change-Id: I95d1b5e781ee9adaec23dda749e514f2485eda10
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2444578
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70456}
parent 2c38a477
...@@ -1412,6 +1412,10 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -1412,6 +1412,10 @@ void InstructionSelector::VisitNode(Node* node) {
MarkAsRepresentation(MachineRepresentation::kSimd128, node); MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadTransform(node); return VisitLoadTransform(node);
} }
case IrOpcode::kLoadLane: {
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadLane(node);
}
case IrOpcode::kPoisonedLoad: { case IrOpcode::kPoisonedLoad: {
LoadRepresentation type = LoadRepresentationOf(node->op()); LoadRepresentation type = LoadRepresentationOf(node->op());
MarkAsRepresentation(type.representation(), node); MarkAsRepresentation(type.representation(), node);
...@@ -2693,6 +2697,11 @@ void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) { ...@@ -2693,6 +2697,11 @@ void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
} }
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
// TODO(v8:10975): Prototyping load lane and store lane.
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) { void InstructionSelector::VisitParameter(Node* node) {
......
...@@ -3243,6 +3243,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3243,6 +3243,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kX64Pinsrb: {
// TODO(zhin): consolidate this opcode with the other usages, like
// ReplaceLane, by implementing support when this has no addressing mode.
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrb(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrw: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrw(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrd: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrd(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrq: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrq(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64I8x16SConvertI16x8: { case kX64I8x16SConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
......
...@@ -281,6 +281,10 @@ namespace compiler { ...@@ -281,6 +281,10 @@ namespace compiler {
V(X64I8x16ExtractLaneU) \ V(X64I8x16ExtractLaneU) \
V(X64I8x16ExtractLaneS) \ V(X64I8x16ExtractLaneS) \
V(X64I8x16ReplaceLane) \ V(X64I8x16ReplaceLane) \
V(X64Pinsrb) \
V(X64Pinsrw) \
V(X64Pinsrd) \
V(X64Pinsrq) \
V(X64I8x16SConvertI16x8) \ V(X64I8x16SConvertI16x8) \
V(X64I8x16Neg) \ V(X64I8x16Neg) \
V(X64I8x16Shl) \ V(X64I8x16Shl) \
......
...@@ -253,6 +253,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -253,6 +253,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16ExtractLaneU: case kX64I8x16ExtractLaneU:
case kX64I8x16ExtractLaneS: case kX64I8x16ExtractLaneS:
case kX64I8x16ReplaceLane: case kX64I8x16ReplaceLane:
case kX64Pinsrb:
case kX64Pinsrw:
case kX64Pinsrd:
case kX64Pinsrq:
case kX64I8x16SConvertI16x8: case kX64I8x16SConvertI16x8:
case kX64I8x16Neg: case kX64I8x16Neg:
case kX64I8x16Shl: case kX64I8x16Shl:
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
#include <algorithm> #include <algorithm>
#include "src/base/iterator.h" #include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/compiler/backend/instruction-selector-impl.h" #include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h" #include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h" #include "src/compiler/node-properties.h"
#include "src/roots/roots-inl.h" #include "src/roots/roots-inl.h"
...@@ -342,6 +344,44 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) { ...@@ -342,6 +344,44 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), rdx)); Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), rdx));
} }
void InstructionSelector::VisitLoadLane(Node* node) {
LoadLaneParameters params = LoadLaneParametersOf(node->op());
InstructionCode opcode = kArchNop;
if (params.rep == MachineType::Int8()) {
opcode = kX64Pinsrb;
} else if (params.rep == MachineType::Int16()) {
opcode = kX64Pinsrw;
} else if (params.rep == MachineType::Int32()) {
opcode = kX64Pinsrd;
} else if (params.rep == MachineType::Int64()) {
opcode = kX64Pinsrq;
} else {
UNREACHABLE();
}
X64OperandGenerator g(this);
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
// GetEffectiveAddressMemoryOperand uses up to 3 inputs, 4th is laneidx, 5th
// is the value node.
InstructionOperand inputs[5];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode);
inputs[input_count++] = g.UseImmediate(params.laneidx);
inputs[input_count++] = g.UseRegister(node->InputAt(2));
DCHECK_GE(5, input_count);
// x64 supports unaligned loads.
DCHECK_NE(params.kind, LoadKind::kUnaligned);
if (params.kind == LoadKind::kProtected) {
opcode |= MiscField::encode(kMemoryAccessProtected);
}
Emit(opcode, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitLoadTransform(Node* node) { void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op()); LoadTransformParameters params = LoadTransformParametersOf(node->op());
ArchOpcode opcode = kArchNop; ArchOpcode opcode = kArchNop;
......
...@@ -99,6 +99,25 @@ bool operator!=(LoadTransformParameters lhs, LoadTransformParameters rhs) { ...@@ -99,6 +99,25 @@ bool operator!=(LoadTransformParameters lhs, LoadTransformParameters rhs) {
return !(lhs == rhs); return !(lhs == rhs);
} }
size_t hash_value(LoadLaneParameters params) {
return base::hash_combine(params.kind, params.rep, params.laneidx);
}
std::ostream& operator<<(std::ostream& os, LoadLaneParameters params) {
return os << "(" << params.kind << " " << params.rep << " " << params.laneidx
<< ")";
}
LoadLaneParameters const& LoadLaneParametersOf(Operator const* op) {
DCHECK_EQ(IrOpcode::kLoadLane, op->opcode());
return OpParameter<LoadLaneParameters>(op);
}
bool operator==(LoadLaneParameters lhs, LoadLaneParameters rhs) {
return lhs.kind == rhs.kind && lhs.rep == rhs.rep &&
lhs.laneidx == rhs.laneidx;
}
LoadRepresentation LoadRepresentationOf(Operator const* op) { LoadRepresentation LoadRepresentationOf(Operator const* op) {
DCHECK(IrOpcode::kLoad == op->opcode() || DCHECK(IrOpcode::kLoad == op->opcode() ||
IrOpcode::kProtectedLoad == op->opcode() || IrOpcode::kProtectedLoad == op->opcode() ||
...@@ -602,6 +621,15 @@ ShiftKind ShiftKindOf(Operator const* op) { ...@@ -602,6 +621,15 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8, 8) \ V(I16x8, 8) \
V(I8x16, 16) V(I8x16, 16)
#define SIMD_I64x2_LANES(V) V(0) V(1)
#define SIMD_I32x4_LANES(V) SIMD_I64x2_LANES(V) V(2) V(3)
#define SIMD_I16x8_LANES(V) SIMD_I32x4_LANES(V) V(4) V(5) V(6) V(7)
#define SIMD_I8x16_LANES(V) \
SIMD_I16x8_LANES(V) V(8) V(9) V(10) V(11) V(12) V(13) V(14) V(15)
#define STACK_SLOT_CACHED_SIZES_ALIGNMENTS_LIST(V) \ #define STACK_SLOT_CACHED_SIZES_ALIGNMENTS_LIST(V) \
V(4, 0) V(8, 0) V(16, 0) V(4, 4) V(8, 8) V(16, 16) V(4, 0) V(8, 0) V(16, 0) V(4, 4) V(8, 8) V(16, 16)
...@@ -752,6 +780,19 @@ struct LoadTransformOperator : public Operator1<LoadTransformParameters> { ...@@ -752,6 +780,19 @@ struct LoadTransformOperator : public Operator1<LoadTransformParameters> {
LoadTransformParameters{kind, type}) {} LoadTransformParameters{kind, type}) {}
}; };
template <LoadKind kind, MachineRepresentation rep, MachineSemantic sem,
uint8_t laneidx>
struct LoadLaneOperator : public Operator1<LoadLaneParameters> {
LoadLaneOperator()
: Operator1(
IrOpcode::kLoadLane,
kind == LoadKind::kProtected
? Operator::kNoDeopt | Operator::kNoThrow
: Operator::kEliminatable,
"LoadLane", 3, 1, 1, 1, 1, 0,
LoadLaneParameters{kind, LoadRepresentation(rep, sem), laneidx}) {}
};
template <MachineRepresentation rep, WriteBarrierKind write_barrier_kind> template <MachineRepresentation rep, WriteBarrierKind write_barrier_kind>
struct StoreOperator : public Operator1<StoreRepresentation> { struct StoreOperator : public Operator1<StoreRepresentation> {
StoreOperator() StoreOperator()
...@@ -1123,6 +1164,40 @@ const Operator* MachineOperatorBuilder::LoadTransform( ...@@ -1123,6 +1164,40 @@ const Operator* MachineOperatorBuilder::LoadTransform(
UNREACHABLE(); UNREACHABLE();
} }
const Operator* MachineOperatorBuilder::LoadLane(LoadKind kind,
LoadRepresentation rep,
uint8_t laneidx) {
#define LOAD_LANE_KIND(TYPE, KIND, LANEIDX) \
if (kind == LoadKind::k##KIND && rep == MachineType::TYPE() && \
laneidx == LANEIDX) { \
return GetCachedOperator<LoadLaneOperator< \
LoadKind::k##KIND, MachineType::TYPE().representation(), \
MachineType::TYPE().semantic(), LANEIDX>>(); \
}
#define LOAD_LANE_T(T, LANE) \
LOAD_LANE_KIND(T, Normal, LANE) \
LOAD_LANE_KIND(T, Unaligned, LANE) \
LOAD_LANE_KIND(T, Protected, LANE)
#define LOAD_LANE_INT8(LANE) LOAD_LANE_T(Int8, LANE)
#define LOAD_LANE_INT16(LANE) LOAD_LANE_T(Int16, LANE)
#define LOAD_LANE_INT32(LANE) LOAD_LANE_T(Int32, LANE)
#define LOAD_LANE_INT64(LANE) LOAD_LANE_T(Int64, LANE)
// Semicolons unnecessary, but helps formatting.
SIMD_I8x16_LANES(LOAD_LANE_INT8);
SIMD_I16x8_LANES(LOAD_LANE_INT16);
SIMD_I32x4_LANES(LOAD_LANE_INT32);
SIMD_I64x2_LANES(LOAD_LANE_INT64);
#undef LOAD_LANE_INT8
#undef LOAD_LANE_INT16
#undef LOAD_LANE_INT32
#undef LOAD_LANE_INT64
#undef LOAD_LANE_KIND
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) { const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) {
DCHECK_LE(0, size); DCHECK_LE(0, size);
DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16); DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16);
......
...@@ -91,6 +91,17 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, ...@@ -91,6 +91,17 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
V8_EXPORT_PRIVATE LoadTransformParameters const& LoadTransformParametersOf( V8_EXPORT_PRIVATE LoadTransformParameters const& LoadTransformParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT; Operator const*) V8_WARN_UNUSED_RESULT;
struct LoadLaneParameters {
LoadKind kind;
LoadRepresentation rep;
uint8_t laneidx;
};
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadLaneParameters);
V8_EXPORT_PRIVATE LoadLaneParameters const& LoadLaneParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT;
// A Store needs a MachineType and a WriteBarrierKind in order to emit the // A Store needs a MachineType and a WriteBarrierKind in order to emit the
// correct write barrier. // correct write barrier.
class StoreRepresentation final { class StoreRepresentation final {
...@@ -780,6 +791,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -780,6 +791,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* LoadTransform(LoadKind kind, LoadTransformation transform); const Operator* LoadTransform(LoadKind kind, LoadTransformation transform);
// SIMD load: replace a specified lane with [base + index].
const Operator* LoadLane(LoadKind kind, LoadRepresentation rep,
uint8_t laneidx);
// store [base + index], value // store [base + index], value
const Operator* Store(StoreRepresentation rep); const Operator* Store(StoreRepresentation rep);
const Operator* ProtectedStore(MachineRepresentation rep); const Operator* ProtectedStore(MachineRepresentation rep);
......
...@@ -956,7 +956,8 @@ ...@@ -956,7 +956,8 @@
V(V16x8AllTrue) \ V(V16x8AllTrue) \
V(V8x16AnyTrue) \ V(V8x16AnyTrue) \
V(V8x16AllTrue) \ V(V8x16AllTrue) \
V(LoadTransform) V(LoadTransform) \
V(LoadLane)
#define VALUE_OP_LIST(V) \ #define VALUE_OP_LIST(V) \
COMMON_OP_LIST(V) \ COMMON_OP_LIST(V) \
......
...@@ -3887,6 +3887,7 @@ LoadTransformation GetLoadTransformation( ...@@ -3887,6 +3887,7 @@ LoadTransformation GetLoadTransformation(
} else if (memtype == MachineType::Int64()) { } else if (memtype == MachineType::Int64()) {
return LoadTransformation::kS128LoadMem64Zero; return LoadTransformation::kS128LoadMem64Zero;
} }
break;
} }
} }
UNREACHABLE(); UNREACHABLE();
...@@ -3983,6 +3984,33 @@ Node* WasmGraphBuilder::LoadTransformBigEndian( ...@@ -3983,6 +3984,33 @@ Node* WasmGraphBuilder::LoadTransformBigEndian(
} }
#endif #endif
Node* WasmGraphBuilder::LoadLane(MachineType memtype, Node* value, Node* index,
uint32_t offset, uint8_t laneidx,
wasm::WasmCodePosition position) {
has_simd_ = true;
Node* load;
uint8_t access_size = memtype.MemSize();
index =
BoundsCheckMem(access_size, index, offset, position, kCanOmitBoundsCheck);
LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler());
load = SetEffect(graph()->NewNode(
mcgraph()->machine()->LoadLane(load_kind, memtype, laneidx),
MemBuffer(offset), index, value, effect(), control()));
if (load_kind == LoadKind::kProtected) {
SetSourcePosition(load, position);
}
if (FLAG_trace_wasm_memory) {
TraceMemoryOperation(false, memtype.representation(), index, offset,
position);
}
return load;
}
Node* WasmGraphBuilder::LoadTransform(wasm::ValueType type, MachineType memtype, Node* WasmGraphBuilder::LoadTransform(wasm::ValueType type, MachineType memtype,
wasm::LoadTransformationKind transform, wasm::LoadTransformationKind transform,
Node* index, uint64_t offset, Node* index, uint64_t offset,
......
...@@ -320,6 +320,8 @@ class WasmGraphBuilder { ...@@ -320,6 +320,8 @@ class WasmGraphBuilder {
wasm::LoadTransformationKind transform, Node* index, wasm::LoadTransformationKind transform, Node* index,
uint64_t offset, uint32_t alignment, uint64_t offset, uint32_t alignment,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* LoadLane(MachineType memtype, Node* value, Node* index, uint32_t offset,
uint8_t laneidx, wasm::WasmCodePosition position);
Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset, Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset,
uint32_t alignment, Node* val, wasm::WasmCodePosition position, uint32_t alignment, Node* val, wasm::WasmCodePosition position,
wasm::ValueType type); wasm::ValueType type);
......
...@@ -2331,6 +2331,12 @@ class LiftoffCompiler { ...@@ -2331,6 +2331,12 @@ class LiftoffCompiler {
} }
} }
void LoadLane(FullDecoder* decoder, LoadType type, const Value& value,
const Value& index, const MemoryAccessImmediate<validate>& imm,
const uint8_t laneidx, Value* result) {
unsupported(decoder, kSimd, "simd load lane");
}
void StoreMem(FullDecoder* decoder, StoreType type, void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const MemoryAccessImmediate<validate>& imm,
const Value& index_val, const Value& value_val) { const Value& index_val, const Value& value_val) {
......
...@@ -978,6 +978,9 @@ struct ControlBase : public PcForErrors<validate> { ...@@ -978,6 +978,9 @@ struct ControlBase : public PcForErrors<validate> {
F(LoadTransform, LoadType type, LoadTransformationKind transform, \ F(LoadTransform, LoadType type, LoadTransformationKind transform, \
const MemoryAccessImmediate<validate>& imm, const Value& index, \ const MemoryAccessImmediate<validate>& imm, const Value& index, \
Value* result) \ Value* result) \
F(LoadLane, LoadType type, const Value& value, const Value& index, \
const MemoryAccessImmediate<validate>& imm, const uint8_t laneidx, \
Value* result) \
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \ F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value) \ const Value& index, const Value& value) \
F(CurrentMemoryPages, Value* result) \ F(CurrentMemoryPages, Value* result) \
...@@ -1733,13 +1736,27 @@ class WasmDecoder : public Decoder { ...@@ -1733,13 +1736,27 @@ class WasmDecoder : public Decoder {
return 2 + length; return 2 + length;
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name: #define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE) FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE)
FOREACH_SIMD_POST_MVP_MEM_OPCODE(DECLARE_OPCODE_CASE)
#undef DECLARE_OPCODE_CASE #undef DECLARE_OPCODE_CASE
{ {
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1, MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX); UINT32_MAX);
return 1 + length + imm.length; return 1 + length + imm.length;
} }
case kExprS128LoadMem32Zero:
case kExprS128LoadMem64Zero: {
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX);
return 1 + length + imm.length;
}
case kExprS128Load8Lane:
case kExprS128Load16Lane:
case kExprS128Load32Lane:
case kExprS128Load64Lane: {
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX);
// 1 more byte for lane index immediate.
return 1 + length + imm.length + 1;
}
// Shuffles require a byte per lane, or 16 immediate bytes. // Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS128Const: case kExprS128Const:
case kExprI8x16Shuffle: case kExprI8x16Shuffle:
...@@ -3331,6 +3348,21 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -3331,6 +3348,21 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return opcode_length + imm.length; return opcode_length + imm.length;
} }
int DecodeLoadLane(LoadType type, uint32_t opcode_length) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> mem_imm(this, this->pc_ + opcode_length,
type.size_log_2());
SimdLaneImmediate<validate> lane_imm(
this, this->pc_ + opcode_length + mem_imm.length);
Value v128 = Pop(1, kWasmS128);
Value index = Pop(0, kWasmI32);
Value* result = Push(kWasmS128);
CALL_INTERFACE_IF_REACHABLE(LoadLane, type, v128, index, mem_imm,
lane_imm.lane, result);
return opcode_length + mem_imm.length + lane_imm.length;
}
int DecodeStoreMem(StoreType store, int prefix_len = 1) { int DecodeStoreMem(StoreType store, int prefix_len = 1) {
if (!CheckHasMemory()) return 0; if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len, MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len,
...@@ -3562,6 +3594,18 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -3562,6 +3594,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return DecodeLoadTransformMem(LoadType::kI64Load32U, return DecodeLoadTransformMem(LoadType::kI64Load32U,
LoadTransformationKind::kExtend, LoadTransformationKind::kExtend,
opcode_length); opcode_length);
case kExprS128Load8Lane: {
return DecodeLoadLane(LoadType::kI32Load8S, opcode_length);
}
case kExprS128Load16Lane: {
return DecodeLoadLane(LoadType::kI32Load16S, opcode_length);
}
case kExprS128Load32Lane: {
return DecodeLoadLane(LoadType::kI32Load, opcode_length);
}
case kExprS128Load64Lane: {
return DecodeLoadLane(LoadType::kI64Load, opcode_length);
}
case kExprS128Const: case kExprS128Const:
return SimdConstOp(opcode_length); return SimdConstOp(opcode_length);
default: { default: {
......
...@@ -436,6 +436,13 @@ class WasmGraphBuildingInterface { ...@@ -436,6 +436,13 @@ class WasmGraphBuildingInterface {
index.node, imm.offset, imm.alignment, decoder->position()); index.node, imm.offset, imm.alignment, decoder->position());
} }
void LoadLane(FullDecoder* decoder, LoadType type, const Value& value,
const Value& index, const MemoryAccessImmediate<validate>& imm,
const uint8_t laneidx, Value* result) {
result->node = BUILD(LoadLane, type.mem_type(), value.node, index.node,
imm.offset, laneidx, decoder->position());
}
void StoreMem(FullDecoder* decoder, StoreType type, void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const Value& index, const MemoryAccessImmediate<validate>& imm, const Value& index,
const Value& value) { const Value& value) {
......
...@@ -309,6 +309,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -309,6 +309,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Load16x4U, "load16x4_u") CASE_S128_OP(Load16x4U, "load16x4_u")
CASE_S128_OP(Load32x2S, "load32x2_s") CASE_S128_OP(Load32x2S, "load32x2_s")
CASE_S128_OP(Load32x2U, "load32x2_u") CASE_S128_OP(Load32x2U, "load32x2_u")
CASE_S128_OP(Load8Lane, "load8_lane")
CASE_S128_OP(Load16Lane, "load16_lane")
CASE_S128_OP(Load32Lane, "load32_lane")
CASE_S128_OP(Load64Lane, "load64_lane")
CASE_I8x16_OP(RoundingAverageU, "avgr_u") CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u") CASE_I16x8_OP(RoundingAverageU, "avgr_u")
......
...@@ -464,7 +464,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, ...@@ -464,7 +464,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_MEM_OPCODE(V) \ #define FOREACH_SIMD_POST_MVP_MEM_OPCODE(V) \
V(S128LoadMem32Zero, 0xfdfc, s_i) \ V(S128LoadMem32Zero, 0xfdfc, s_i) \
V(S128LoadMem64Zero, 0xfdfd, s_i) V(S128LoadMem64Zero, 0xfdfd, s_i) \
V(S128Load8Lane, 0xfd58, s_is) \
V(S128Load16Lane, 0xfd59, s_is) \
V(S128Load32Lane, 0xfd5a, s_is) \
V(S128Load64Lane, 0xfd5b, s_is)
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \ #define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \ V(I8x16Mul, 0xfd75, s_ss) \
...@@ -701,7 +705,8 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig, ...@@ -701,7 +705,8 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(s_si, kWasmS128, kWasmS128, kWasmI32) \ V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \ V(i_s, kWasmI32, kWasmS128) \
V(v_is, kWasmStmt, kWasmI32, kWasmS128) \ V(v_is, kWasmStmt, kWasmI32, kWasmS128) \
V(s_sss, kWasmS128, kWasmS128, kWasmS128, kWasmS128) V(s_sss, kWasmS128, kWasmS128, kWasmS128, kWasmS128) \
V(s_is, kWasmS128, kWasmI32, kWasmS128)
#define FOREACH_PREFIX(V) \ #define FOREACH_PREFIX(V) \
V(Numeric, 0xfc) \ V(Numeric, 0xfc) \
......
...@@ -46,10 +46,21 @@ class Simd128 { ...@@ -46,10 +46,21 @@ class Simd128 {
const uint8_t* bytes() { return val_; } const uint8_t* bytes() { return val_; }
template <typename T>
inline T to();
private: private:
uint8_t val_[16] = {0}; uint8_t val_[16] = {0};
}; };
#define DECLARE_CAST(cType, sType, name, size) \
template <> \
inline sType Simd128::to() { \
return to_##name(); \
}
FOREACH_SIMD_TYPE(DECLARE_CAST)
#undef DECLARE_CAST
// Macro for defining WasmValue methods for different types. // Macro for defining WasmValue methods for different types.
// Elements: // Elements:
// - name (for to_<name>() method) // - name (for to_<name>() method)
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "src/base/bits.h" #include "src/base/bits.h"
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h" #include "src/codegen/assembler-inl.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h" #include "test/cctest/cctest.h"
#include "test/cctest/compiler/value-helper.h" #include "test/cctest/compiler/value-helper.h"
#include "test/cctest/wasm/wasm-run-utils.h" #include "test/cctest/wasm/wasm-run-utils.h"
...@@ -3562,6 +3563,87 @@ WASM_SIMD_TEST_NO_LOWERING(S128LoadMem64Zero) { ...@@ -3562,6 +3563,87 @@ WASM_SIMD_TEST_NO_LOWERING(S128LoadMem64Zero) {
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64
// TODO(v8:10975): Prototyping load lane and store lane.
template <typename T>
void RunLoadLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode load_op, WasmOpcode splat_op) {
FLAG_SCOPE(wasm_simd_post_mvp);
if (execution_tier == TestExecutionTier::kLiftoff) {
// Not yet implemented.
return;
}
WasmOpcode const_op =
splat_op == kExprI64x2Splat ? kExprI64Const : kExprI32Const;
constexpr int lanes_s = kSimd128Size / sizeof(T);
constexpr int mem_index = 16; // Load from mem index 16 (bytes).
constexpr int splat_value = 33;
for (int lane_index = 0; lane_index < lanes_s; lane_index++) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
T* memory = r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
T* global = r.builder().AddGlobal<T>(kWasmS128);
// Splat splat_value, then only load and replace a single lane with the
// sentinel value.
BUILD(r, WASM_I32V(mem_index), const_op, splat_value,
WASM_SIMD_OP(splat_op), WASM_SIMD_OP(load_op), ZERO_ALIGNMENT,
ZERO_OFFSET, lane_index, kExprGlobalSet, 0, WASM_ONE);
T sentinel = T{-1};
r.builder().WriteMemory(&memory[lanes_s], sentinel);
r.Call();
// Only one lane is loaded, the rest of the lanes are unchanged.
for (int i = 0; i < lanes_s; i++) {
if (i == lane_index) {
CHECK_EQ(sentinel, ReadLittleEndianValue<T>(&global[i]));
} else {
CHECK_EQ(T{splat_value}, ReadLittleEndianValue<T>(&global[i]));
}
}
}
// Test for OOB.
{
WasmRunner<int32_t, uint32_t> r(execution_tier, lower_simd);
r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
r.builder().AddGlobal<T>(kWasmS128);
BUILD(r, WASM_GET_LOCAL(0), const_op, splat_value, WASM_SIMD_OP(splat_op),
WASM_SIMD_OP(load_op), ZERO_ALIGNMENT, ZERO_OFFSET, 0, kExprGlobalSet,
0, WASM_ONE);
// Load lane load sizeof(T) bytes.
for (uint32_t index = kWasmPageSize - (sizeof(T) - 1);
index < kWasmPageSize; ++index) {
CHECK_TRAP(r.Call(index));
}
}
}
WASM_SIMD_TEST_NO_LOWERING(S128Load8Lane) {
RunLoadLaneTest<int8_t>(execution_tier, lower_simd, kExprS128Load8Lane,
kExprI8x16Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load16Lane) {
RunLoadLaneTest<int16_t>(execution_tier, lower_simd, kExprS128Load16Lane,
kExprI16x8Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load32Lane) {
RunLoadLaneTest<int32_t>(execution_tier, lower_simd, kExprS128Load32Lane,
kExprI32x4Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load64Lane) {
RunLoadLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Load64Lane,
kExprI64x2Splat);
}
#endif // V8_TARGET_ARCH_X64
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \ #define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
WASM_SIMD_TEST(S##format##AnyTrue) { \ WASM_SIMD_TEST(S##format##AnyTrue) { \
FLAG_SCOPE(wasm_simd_post_mvp); \ FLAG_SCOPE(wasm_simd_post_mvp); \
......
...@@ -2675,6 +2675,22 @@ class WasmInterpreterInternals { ...@@ -2675,6 +2675,22 @@ class WasmInterpreterInternals {
return DoSimdLoadZeroExtend<int2, uint64_t>( return DoSimdLoadZeroExtend<int2, uint64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64); decoder, code, pc, len, MachineRepresentation::kWord64);
} }
case kExprS128Load8Lane: {
return DoSimdLoadLane<int16, int32_t, int8_t>(
decoder, code, pc, len, MachineRepresentation::kWord8);
}
case kExprS128Load16Lane: {
return DoSimdLoadLane<int8, int32_t, int16_t>(
decoder, code, pc, len, MachineRepresentation::kWord16);
}
case kExprS128Load32Lane: {
return DoSimdLoadLane<int4, int32_t, int32_t>(
decoder, code, pc, len, MachineRepresentation::kWord32);
}
case kExprS128Load64Lane: {
return DoSimdLoadLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
default: default:
return false; return false;
} }
...@@ -2735,6 +2751,24 @@ class WasmInterpreterInternals { ...@@ -2735,6 +2751,24 @@ class WasmInterpreterInternals {
return true; return true;
} }
template <typename s_type, typename result_type, typename load_type>
bool DoSimdLoadLane(Decoder* decoder, InterpreterCode* code, pc_t pc,
int* const len, MachineRepresentation rep) {
s_type value = Pop().to_s128().to<s_type>();
if (!ExecuteLoad<result_type, load_type>(decoder, code, pc, len, rep,
/*prefix_len=*/*len)) {
return false;
}
SimdLaneImmediate<Decoder::kNoValidation> lane_imm(decoder,
code->at(pc + *len));
*len += lane_imm.length;
result_type loaded = Pop().to<result_type>();
value.val[LANE(lane_imm.lane, value)] = loaded;
Push(WasmValue(Simd128(value)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack // Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame. // overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully // Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment