Commit 673be63e authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype load lane

Load lane loads a value from memory and replaces a single lane of a
simd value.

This implements the load (no stores yet) for x64 and interpreter.

Bug: v8:10975
Change-Id: I95d1b5e781ee9adaec23dda749e514f2485eda10
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2444578
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70456}
parent 2c38a477
......@@ -1412,6 +1412,10 @@ void InstructionSelector::VisitNode(Node* node) {
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadTransform(node);
}
case IrOpcode::kLoadLane: {
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadLane(node);
}
case IrOpcode::kPoisonedLoad: {
LoadRepresentation type = LoadRepresentationOf(node->op());
MarkAsRepresentation(type.representation(), node);
......@@ -2693,6 +2697,11 @@ void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
}
#endif // !V8_TARGET_ARCH_ARM64
// TODO(v8:10975): Prototyping load lane and store lane.
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -3243,6 +3243,44 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64Pinsrb: {
// TODO(zhin): consolidate this opcode with the other usages, like
// ReplaceLane, by implementing support when this has no addressing mode.
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrb(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrw: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrw(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrd: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrd(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64Pinsrq: {
DCHECK(HasAddressingMode(instr));
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t offset = 0;
Operand mem = i.MemoryOperand(&offset);
__ Pinsrq(i.OutputSimd128Register(), i.InputSimd128Register(offset + 1),
mem, i.InputUint8(offset));
break;
}
case kX64I8x16SConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
......
......@@ -281,6 +281,10 @@ namespace compiler {
V(X64I8x16ExtractLaneU) \
V(X64I8x16ExtractLaneS) \
V(X64I8x16ReplaceLane) \
V(X64Pinsrb) \
V(X64Pinsrw) \
V(X64Pinsrd) \
V(X64Pinsrq) \
V(X64I8x16SConvertI16x8) \
V(X64I8x16Neg) \
V(X64I8x16Shl) \
......
......@@ -253,6 +253,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I8x16ExtractLaneU:
case kX64I8x16ExtractLaneS:
case kX64I8x16ReplaceLane:
case kX64Pinsrb:
case kX64Pinsrw:
case kX64Pinsrd:
case kX64Pinsrq:
case kX64I8x16SConvertI16x8:
case kX64I8x16Neg:
case kX64I8x16Shl:
......
......@@ -5,8 +5,10 @@
#include <algorithm>
#include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/overflowing-math.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/roots/roots-inl.h"
......@@ -342,6 +344,44 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), rdx));
}
void InstructionSelector::VisitLoadLane(Node* node) {
LoadLaneParameters params = LoadLaneParametersOf(node->op());
InstructionCode opcode = kArchNop;
if (params.rep == MachineType::Int8()) {
opcode = kX64Pinsrb;
} else if (params.rep == MachineType::Int16()) {
opcode = kX64Pinsrw;
} else if (params.rep == MachineType::Int32()) {
opcode = kX64Pinsrd;
} else if (params.rep == MachineType::Int64()) {
opcode = kX64Pinsrq;
} else {
UNREACHABLE();
}
X64OperandGenerator g(this);
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
// GetEffectiveAddressMemoryOperand uses up to 3 inputs, 4th is laneidx, 5th
// is the value node.
InstructionOperand inputs[5];
size_t input_count = 0;
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(mode);
inputs[input_count++] = g.UseImmediate(params.laneidx);
inputs[input_count++] = g.UseRegister(node->InputAt(2));
DCHECK_GE(5, input_count);
// x64 supports unaligned loads.
DCHECK_NE(params.kind, LoadKind::kUnaligned);
if (params.kind == LoadKind::kProtected) {
opcode |= MiscField::encode(kMemoryAccessProtected);
}
Emit(opcode, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
ArchOpcode opcode = kArchNop;
......@@ -658,7 +698,7 @@ bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector,
case MachineRepresentation::kWord64:
case MachineRepresentation::kTaggedSigned:
case MachineRepresentation::kTagged:
case MachineRepresentation::kCompressed: // Fall through.
case MachineRepresentation::kCompressed: // Fall through.
opcode = kX64Movl;
break;
default:
......
......@@ -99,6 +99,25 @@ bool operator!=(LoadTransformParameters lhs, LoadTransformParameters rhs) {
return !(lhs == rhs);
}
size_t hash_value(LoadLaneParameters params) {
return base::hash_combine(params.kind, params.rep, params.laneidx);
}
std::ostream& operator<<(std::ostream& os, LoadLaneParameters params) {
return os << "(" << params.kind << " " << params.rep << " " << params.laneidx
<< ")";
}
LoadLaneParameters const& LoadLaneParametersOf(Operator const* op) {
DCHECK_EQ(IrOpcode::kLoadLane, op->opcode());
return OpParameter<LoadLaneParameters>(op);
}
bool operator==(LoadLaneParameters lhs, LoadLaneParameters rhs) {
return lhs.kind == rhs.kind && lhs.rep == rhs.rep &&
lhs.laneidx == rhs.laneidx;
}
LoadRepresentation LoadRepresentationOf(Operator const* op) {
DCHECK(IrOpcode::kLoad == op->opcode() ||
IrOpcode::kProtectedLoad == op->opcode() ||
......@@ -602,6 +621,15 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I16x8, 8) \
V(I8x16, 16)
#define SIMD_I64x2_LANES(V) V(0) V(1)
#define SIMD_I32x4_LANES(V) SIMD_I64x2_LANES(V) V(2) V(3)
#define SIMD_I16x8_LANES(V) SIMD_I32x4_LANES(V) V(4) V(5) V(6) V(7)
#define SIMD_I8x16_LANES(V) \
SIMD_I16x8_LANES(V) V(8) V(9) V(10) V(11) V(12) V(13) V(14) V(15)
#define STACK_SLOT_CACHED_SIZES_ALIGNMENTS_LIST(V) \
V(4, 0) V(8, 0) V(16, 0) V(4, 4) V(8, 8) V(16, 16)
......@@ -752,6 +780,19 @@ struct LoadTransformOperator : public Operator1<LoadTransformParameters> {
LoadTransformParameters{kind, type}) {}
};
template <LoadKind kind, MachineRepresentation rep, MachineSemantic sem,
uint8_t laneidx>
struct LoadLaneOperator : public Operator1<LoadLaneParameters> {
LoadLaneOperator()
: Operator1(
IrOpcode::kLoadLane,
kind == LoadKind::kProtected
? Operator::kNoDeopt | Operator::kNoThrow
: Operator::kEliminatable,
"LoadLane", 3, 1, 1, 1, 1, 0,
LoadLaneParameters{kind, LoadRepresentation(rep, sem), laneidx}) {}
};
template <MachineRepresentation rep, WriteBarrierKind write_barrier_kind>
struct StoreOperator : public Operator1<StoreRepresentation> {
StoreOperator()
......@@ -1123,6 +1164,40 @@ const Operator* MachineOperatorBuilder::LoadTransform(
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::LoadLane(LoadKind kind,
LoadRepresentation rep,
uint8_t laneidx) {
#define LOAD_LANE_KIND(TYPE, KIND, LANEIDX) \
if (kind == LoadKind::k##KIND && rep == MachineType::TYPE() && \
laneidx == LANEIDX) { \
return GetCachedOperator<LoadLaneOperator< \
LoadKind::k##KIND, MachineType::TYPE().representation(), \
MachineType::TYPE().semantic(), LANEIDX>>(); \
}
#define LOAD_LANE_T(T, LANE) \
LOAD_LANE_KIND(T, Normal, LANE) \
LOAD_LANE_KIND(T, Unaligned, LANE) \
LOAD_LANE_KIND(T, Protected, LANE)
#define LOAD_LANE_INT8(LANE) LOAD_LANE_T(Int8, LANE)
#define LOAD_LANE_INT16(LANE) LOAD_LANE_T(Int16, LANE)
#define LOAD_LANE_INT32(LANE) LOAD_LANE_T(Int32, LANE)
#define LOAD_LANE_INT64(LANE) LOAD_LANE_T(Int64, LANE)
// Semicolons unnecessary, but helps formatting.
SIMD_I8x16_LANES(LOAD_LANE_INT8);
SIMD_I16x8_LANES(LOAD_LANE_INT16);
SIMD_I32x4_LANES(LOAD_LANE_INT32);
SIMD_I64x2_LANES(LOAD_LANE_INT64);
#undef LOAD_LANE_INT8
#undef LOAD_LANE_INT16
#undef LOAD_LANE_INT32
#undef LOAD_LANE_INT64
#undef LOAD_LANE_KIND
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) {
DCHECK_LE(0, size);
DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16);
......
......@@ -91,6 +91,17 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
V8_EXPORT_PRIVATE LoadTransformParameters const& LoadTransformParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT;
struct LoadLaneParameters {
LoadKind kind;
LoadRepresentation rep;
uint8_t laneidx;
};
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadLaneParameters);
V8_EXPORT_PRIVATE LoadLaneParameters const& LoadLaneParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT;
// A Store needs a MachineType and a WriteBarrierKind in order to emit the
// correct write barrier.
class StoreRepresentation final {
......@@ -780,6 +791,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* LoadTransform(LoadKind kind, LoadTransformation transform);
// SIMD load: replace a specified lane with [base + index].
const Operator* LoadLane(LoadKind kind, LoadRepresentation rep,
uint8_t laneidx);
// store [base + index], value
const Operator* Store(StoreRepresentation rep);
const Operator* ProtectedStore(MachineRepresentation rep);
......
......@@ -956,7 +956,8 @@
V(V16x8AllTrue) \
V(V8x16AnyTrue) \
V(V8x16AllTrue) \
V(LoadTransform)
V(LoadTransform) \
V(LoadLane)
#define VALUE_OP_LIST(V) \
COMMON_OP_LIST(V) \
......
......@@ -3887,6 +3887,7 @@ LoadTransformation GetLoadTransformation(
} else if (memtype == MachineType::Int64()) {
return LoadTransformation::kS128LoadMem64Zero;
}
break;
}
}
UNREACHABLE();
......@@ -3983,6 +3984,33 @@ Node* WasmGraphBuilder::LoadTransformBigEndian(
}
#endif
Node* WasmGraphBuilder::LoadLane(MachineType memtype, Node* value, Node* index,
uint32_t offset, uint8_t laneidx,
wasm::WasmCodePosition position) {
has_simd_ = true;
Node* load;
uint8_t access_size = memtype.MemSize();
index =
BoundsCheckMem(access_size, index, offset, position, kCanOmitBoundsCheck);
LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler());
load = SetEffect(graph()->NewNode(
mcgraph()->machine()->LoadLane(load_kind, memtype, laneidx),
MemBuffer(offset), index, value, effect(), control()));
if (load_kind == LoadKind::kProtected) {
SetSourcePosition(load, position);
}
if (FLAG_trace_wasm_memory) {
TraceMemoryOperation(false, memtype.representation(), index, offset,
position);
}
return load;
}
Node* WasmGraphBuilder::LoadTransform(wasm::ValueType type, MachineType memtype,
wasm::LoadTransformationKind transform,
Node* index, uint64_t offset,
......
......@@ -320,6 +320,8 @@ class WasmGraphBuilder {
wasm::LoadTransformationKind transform, Node* index,
uint64_t offset, uint32_t alignment,
wasm::WasmCodePosition position);
Node* LoadLane(MachineType memtype, Node* value, Node* index, uint32_t offset,
uint8_t laneidx, wasm::WasmCodePosition position);
Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset,
uint32_t alignment, Node* val, wasm::WasmCodePosition position,
wasm::ValueType type);
......
......@@ -2331,6 +2331,12 @@ class LiftoffCompiler {
}
}
void LoadLane(FullDecoder* decoder, LoadType type, const Value& value,
const Value& index, const MemoryAccessImmediate<validate>& imm,
const uint8_t laneidx, Value* result) {
unsupported(decoder, kSimd, "simd load lane");
}
void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm,
const Value& index_val, const Value& value_val) {
......
......@@ -978,6 +978,9 @@ struct ControlBase : public PcForErrors<validate> {
F(LoadTransform, LoadType type, LoadTransformationKind transform, \
const MemoryAccessImmediate<validate>& imm, const Value& index, \
Value* result) \
F(LoadLane, LoadType type, const Value& value, const Value& index, \
const MemoryAccessImmediate<validate>& imm, const uint8_t laneidx, \
Value* result) \
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value) \
F(CurrentMemoryPages, Value* result) \
......@@ -1733,13 +1736,27 @@ class WasmDecoder : public Decoder {
return 2 + length;
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE)
FOREACH_SIMD_POST_MVP_MEM_OPCODE(DECLARE_OPCODE_CASE)
#undef DECLARE_OPCODE_CASE
{
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX);
return 1 + length + imm.length;
}
case kExprS128LoadMem32Zero:
case kExprS128LoadMem64Zero: {
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX);
return 1 + length + imm.length;
}
case kExprS128Load8Lane:
case kExprS128Load16Lane:
case kExprS128Load32Lane:
case kExprS128Load64Lane: {
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
UINT32_MAX);
// 1 more byte for lane index immediate.
return 1 + length + imm.length + 1;
}
// Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS128Const:
case kExprI8x16Shuffle:
......@@ -3331,6 +3348,21 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return opcode_length + imm.length;
}
int DecodeLoadLane(LoadType type, uint32_t opcode_length) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> mem_imm(this, this->pc_ + opcode_length,
type.size_log_2());
SimdLaneImmediate<validate> lane_imm(
this, this->pc_ + opcode_length + mem_imm.length);
Value v128 = Pop(1, kWasmS128);
Value index = Pop(0, kWasmI32);
Value* result = Push(kWasmS128);
CALL_INTERFACE_IF_REACHABLE(LoadLane, type, v128, index, mem_imm,
lane_imm.lane, result);
return opcode_length + mem_imm.length + lane_imm.length;
}
int DecodeStoreMem(StoreType store, int prefix_len = 1) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len,
......@@ -3562,6 +3594,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return DecodeLoadTransformMem(LoadType::kI64Load32U,
LoadTransformationKind::kExtend,
opcode_length);
case kExprS128Load8Lane: {
return DecodeLoadLane(LoadType::kI32Load8S, opcode_length);
}
case kExprS128Load16Lane: {
return DecodeLoadLane(LoadType::kI32Load16S, opcode_length);
}
case kExprS128Load32Lane: {
return DecodeLoadLane(LoadType::kI32Load, opcode_length);
}
case kExprS128Load64Lane: {
return DecodeLoadLane(LoadType::kI64Load, opcode_length);
}
case kExprS128Const:
return SimdConstOp(opcode_length);
default: {
......
......@@ -436,6 +436,13 @@ class WasmGraphBuildingInterface {
index.node, imm.offset, imm.alignment, decoder->position());
}
void LoadLane(FullDecoder* decoder, LoadType type, const Value& value,
const Value& index, const MemoryAccessImmediate<validate>& imm,
const uint8_t laneidx, Value* result) {
result->node = BUILD(LoadLane, type.mem_type(), value.node, index.node,
imm.offset, laneidx, decoder->position());
}
void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const Value& index,
const Value& value) {
......
......@@ -309,6 +309,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Load16x4U, "load16x4_u")
CASE_S128_OP(Load32x2S, "load32x2_s")
CASE_S128_OP(Load32x2U, "load32x2_u")
CASE_S128_OP(Load8Lane, "load8_lane")
CASE_S128_OP(Load16Lane, "load16_lane")
CASE_S128_OP(Load32Lane, "load32_lane")
CASE_S128_OP(Load64Lane, "load64_lane")
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
......
......@@ -464,7 +464,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
#define FOREACH_SIMD_POST_MVP_MEM_OPCODE(V) \
V(S128LoadMem32Zero, 0xfdfc, s_i) \
V(S128LoadMem64Zero, 0xfdfd, s_i)
V(S128LoadMem64Zero, 0xfdfd, s_i) \
V(S128Load8Lane, 0xfd58, s_is) \
V(S128Load16Lane, 0xfd59, s_is) \
V(S128Load32Lane, 0xfd5a, s_is) \
V(S128Load64Lane, 0xfd5b, s_is)
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \
......@@ -691,17 +695,18 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(i_ci, kWasmI32, kWasmFuncRef, kWasmI32) \
V(i_qq, kWasmI32, kWasmEqRef, kWasmEqRef)
#define FOREACH_SIMD_SIGNATURE(V) \
V(s_s, kWasmS128, kWasmS128) \
V(s_f, kWasmS128, kWasmF32) \
V(s_d, kWasmS128, kWasmF64) \
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
V(s_i, kWasmS128, kWasmI32) \
V(s_l, kWasmS128, kWasmI64) \
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \
V(v_is, kWasmStmt, kWasmI32, kWasmS128) \
V(s_sss, kWasmS128, kWasmS128, kWasmS128, kWasmS128)
#define FOREACH_SIMD_SIGNATURE(V) \
V(s_s, kWasmS128, kWasmS128) \
V(s_f, kWasmS128, kWasmF32) \
V(s_d, kWasmS128, kWasmF64) \
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
V(s_i, kWasmS128, kWasmI32) \
V(s_l, kWasmS128, kWasmI64) \
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \
V(v_is, kWasmStmt, kWasmI32, kWasmS128) \
V(s_sss, kWasmS128, kWasmS128, kWasmS128, kWasmS128) \
V(s_is, kWasmS128, kWasmI32, kWasmS128)
#define FOREACH_PREFIX(V) \
V(Numeric, 0xfc) \
......
......@@ -46,10 +46,21 @@ class Simd128 {
const uint8_t* bytes() { return val_; }
template <typename T>
inline T to();
private:
uint8_t val_[16] = {0};
};
#define DECLARE_CAST(cType, sType, name, size) \
template <> \
inline sType Simd128::to() { \
return to_##name(); \
}
FOREACH_SIMD_TYPE(DECLARE_CAST)
#undef DECLARE_CAST
// Macro for defining WasmValue methods for different types.
// Elements:
// - name (for to_<name>() method)
......
......@@ -8,6 +8,7 @@
#include "src/base/bits.h"
#include "src/base/overflowing-math.h"
#include "src/codegen/assembler-inl.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h"
#include "test/cctest/compiler/value-helper.h"
#include "test/cctest/wasm/wasm-run-utils.h"
......@@ -3562,6 +3563,87 @@ WASM_SIMD_TEST_NO_LOWERING(S128LoadMem64Zero) {
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64
// TODO(v8:10975): Prototyping load lane and store lane.
template <typename T>
void RunLoadLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode load_op, WasmOpcode splat_op) {
FLAG_SCOPE(wasm_simd_post_mvp);
if (execution_tier == TestExecutionTier::kLiftoff) {
// Not yet implemented.
return;
}
WasmOpcode const_op =
splat_op == kExprI64x2Splat ? kExprI64Const : kExprI32Const;
constexpr int lanes_s = kSimd128Size / sizeof(T);
constexpr int mem_index = 16; // Load from mem index 16 (bytes).
constexpr int splat_value = 33;
for (int lane_index = 0; lane_index < lanes_s; lane_index++) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
T* memory = r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
T* global = r.builder().AddGlobal<T>(kWasmS128);
// Splat splat_value, then only load and replace a single lane with the
// sentinel value.
BUILD(r, WASM_I32V(mem_index), const_op, splat_value,
WASM_SIMD_OP(splat_op), WASM_SIMD_OP(load_op), ZERO_ALIGNMENT,
ZERO_OFFSET, lane_index, kExprGlobalSet, 0, WASM_ONE);
T sentinel = T{-1};
r.builder().WriteMemory(&memory[lanes_s], sentinel);
r.Call();
// Only one lane is loaded, the rest of the lanes are unchanged.
for (int i = 0; i < lanes_s; i++) {
if (i == lane_index) {
CHECK_EQ(sentinel, ReadLittleEndianValue<T>(&global[i]));
} else {
CHECK_EQ(T{splat_value}, ReadLittleEndianValue<T>(&global[i]));
}
}
}
// Test for OOB.
{
WasmRunner<int32_t, uint32_t> r(execution_tier, lower_simd);
r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
r.builder().AddGlobal<T>(kWasmS128);
BUILD(r, WASM_GET_LOCAL(0), const_op, splat_value, WASM_SIMD_OP(splat_op),
WASM_SIMD_OP(load_op), ZERO_ALIGNMENT, ZERO_OFFSET, 0, kExprGlobalSet,
0, WASM_ONE);
// Load lane load sizeof(T) bytes.
for (uint32_t index = kWasmPageSize - (sizeof(T) - 1);
index < kWasmPageSize; ++index) {
CHECK_TRAP(r.Call(index));
}
}
}
WASM_SIMD_TEST_NO_LOWERING(S128Load8Lane) {
RunLoadLaneTest<int8_t>(execution_tier, lower_simd, kExprS128Load8Lane,
kExprI8x16Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load16Lane) {
RunLoadLaneTest<int16_t>(execution_tier, lower_simd, kExprS128Load16Lane,
kExprI16x8Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load32Lane) {
RunLoadLaneTest<int32_t>(execution_tier, lower_simd, kExprS128Load32Lane,
kExprI32x4Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Load64Lane) {
RunLoadLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Load64Lane,
kExprI64x2Splat);
}
#endif // V8_TARGET_ARCH_X64
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
WASM_SIMD_TEST(S##format##AnyTrue) { \
FLAG_SCOPE(wasm_simd_post_mvp); \
......
......@@ -2675,6 +2675,22 @@ class WasmInterpreterInternals {
return DoSimdLoadZeroExtend<int2, uint64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
case kExprS128Load8Lane: {
return DoSimdLoadLane<int16, int32_t, int8_t>(
decoder, code, pc, len, MachineRepresentation::kWord8);
}
case kExprS128Load16Lane: {
return DoSimdLoadLane<int8, int32_t, int16_t>(
decoder, code, pc, len, MachineRepresentation::kWord16);
}
case kExprS128Load32Lane: {
return DoSimdLoadLane<int4, int32_t, int32_t>(
decoder, code, pc, len, MachineRepresentation::kWord32);
}
case kExprS128Load64Lane: {
return DoSimdLoadLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
default:
return false;
}
......@@ -2735,6 +2751,24 @@ class WasmInterpreterInternals {
return true;
}
template <typename s_type, typename result_type, typename load_type>
bool DoSimdLoadLane(Decoder* decoder, InterpreterCode* code, pc_t pc,
int* const len, MachineRepresentation rep) {
s_type value = Pop().to_s128().to<s_type>();
if (!ExecuteLoad<result_type, load_type>(decoder, code, pc, len, rep,
/*prefix_len=*/*len)) {
return false;
}
SimdLaneImmediate<Decoder::kNoValidation> lane_imm(decoder,
code->at(pc + *len));
*len += lane_imm.length;
result_type loaded = Pop().to<result_type>();
value.val[LANE(lane_imm.lane, value)] = loaded;
Push(WasmValue(Simd128(value)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment