Commit 208578dc authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype store lane

Store lane loads a value from memory and replaces a single lane of a
simd value.

This implements store lane for x64 and interpreter.

Bug: v8:10975
Change-Id: Ida79a03e0fd2bc18f2c06687311936b3cb550ed5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2473383Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70586}
parent 07b3e980
......@@ -1424,6 +1424,10 @@ void InstructionSelector::VisitNode(Node* node) {
return VisitStore(node);
case IrOpcode::kProtectedStore:
return VisitProtectedStore(node);
case IrOpcode::kStoreLane: {
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitStoreLane(node);
}
case IrOpcode::kWord32And:
return MarkAsWord32(node), VisitWord32And(node);
case IrOpcode::kWord32Or:
......@@ -2699,6 +2703,7 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_X64
// TODO(v8:10975): Prototyping load lane and store lane.
void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10997) Prototype i64x2.bitmask.
void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
......
......@@ -3235,6 +3235,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
break;
}
case kX64Pextrb: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
DCHECK(HasAddressingMode(instr));
DCHECK(!instr->HasOutput());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrb(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
break;
}
case kX64Pextrw: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
DCHECK(HasAddressingMode(instr));
DCHECK(!instr->HasOutput());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
__ Pextrw(operand, i.InputSimd128Register(index),
i.InputUint8(index + 1));
break;
}
case kX64I8x16ExtractLaneS: {
Register dst = i.OutputRegister();
__ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
......@@ -3675,6 +3697,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S128Store32Lane: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
uint8_t lane = i.InputUint8(index + 1);
if (lane == 0) {
__ Movss(operand, i.InputSimd128Register(index));
} else {
DCHECK_GE(3, lane);
__ Extractps(operand, i.InputSimd128Register(index), lane);
}
break;
}
case kX64S128Store64Lane: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
size_t index = 0;
Operand operand = i.MemoryOperand(&index);
uint8_t lane = i.InputUint8(index + 1);
if (lane == 0) {
__ Movlps(operand, i.InputSimd128Register(index));
} else {
DCHECK_EQ(1, lane);
__ Movhps(operand, i.InputSimd128Register(index));
}
break;
}
case kX64S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
......
......@@ -281,6 +281,8 @@ namespace compiler {
V(X64Pinsrw) \
V(X64Pinsrd) \
V(X64Pinsrq) \
V(X64Pextrb) \
V(X64Pextrw) \
V(X64I8x16SConvertI16x8) \
V(X64I8x16Neg) \
V(X64I8x16Shl) \
......@@ -330,6 +332,8 @@ namespace compiler {
V(X64S128Load32x2U) \
V(X64S128LoadMem32Zero) \
V(X64S128LoadMem64Zero) \
V(X64S128Store32Lane) \
V(X64S128Store64Lane) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
V(X64S16x8Blend) \
......
......@@ -348,8 +348,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Movb:
case kX64Movw:
case kX64S128Store32Lane:
case kX64S128Store64Lane:
return kHasSideEffect;
case kX64Pextrb:
case kX64Pextrw:
case kX64Movl:
if (instr->HasOutput()) {
DCHECK_LE(1, instr->InputCount());
......
......@@ -7,6 +7,7 @@
#include "src/base/iterator.h"
#include "src/base/logging.h"
#include "src/base/overflowing-math.h"
#include "src/codegen/machine-type.h"
#include "src/compiler/backend/instruction-selector-impl.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-matchers.h"
......@@ -538,6 +539,40 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
// Architecture supports unaligned access, therefore VisitStore is used instead
void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitStoreLane(Node* node) {
X64OperandGenerator g(this);
StoreLaneParameters params = StoreLaneParametersOf(node->op());
InstructionCode opcode = kArchNop;
if (params.rep == MachineRepresentation::kWord8) {
opcode = kX64Pextrb;
} else if (params.rep == MachineRepresentation::kWord16) {
opcode = kX64Pextrw;
} else if (params.rep == MachineRepresentation::kWord32) {
opcode = kX64S128Store32Lane;
} else if (params.rep == MachineRepresentation::kWord64) {
opcode = kX64S128Store64Lane;
} else {
UNREACHABLE();
}
InstructionOperand inputs[4];
size_t input_count = 0;
AddressingMode addressing_mode =
g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
opcode |= AddressingModeField::encode(addressing_mode);
if (params.kind == LoadKind::kProtected) {
opcode |= MiscField::encode(kMemoryAccessProtected);
}
InstructionOperand value_operand = g.UseRegister(node->InputAt(2));
inputs[input_count++] = value_operand;
inputs[input_count++] = g.UseImmediate(params.laneidx);
DCHECK_GE(4, input_count);
Emit(opcode, 0, nullptr, input_count, inputs);
}
// Shared routine for multiple binary operations.
static void VisitBinop(InstructionSelector* selector, Node* node,
InstructionCode opcode, FlagsContinuation* cont) {
......
......@@ -141,6 +141,25 @@ UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf(
return OpParameter<UnalignedStoreRepresentation>(op);
}
size_t hash_value(StoreLaneParameters params) {
return base::hash_combine(params.kind, params.rep, params.laneidx);
}
std::ostream& operator<<(std::ostream& os, StoreLaneParameters params) {
return os << "(" << params.kind << " " << params.rep << " " << params.laneidx
<< ")";
}
StoreLaneParameters const& StoreLaneParametersOf(Operator const* op) {
DCHECK_EQ(IrOpcode::kStoreLane, op->opcode());
return OpParameter<StoreLaneParameters>(op);
}
bool operator==(StoreLaneParameters lhs, StoreLaneParameters rhs) {
return lhs.kind == rhs.kind && lhs.rep == rhs.rep &&
lhs.laneidx == rhs.laneidx;
}
bool operator==(StackSlotRepresentation lhs, StackSlotRepresentation rhs) {
return lhs.size() == rhs.size() && lhs.alignment() == rhs.alignment();
}
......@@ -821,6 +840,15 @@ struct ProtectedStoreOperator : public Operator1<StoreRepresentation> {
StoreRepresentation(rep, kNoWriteBarrier)) {}
};
template <LoadKind kind, MachineRepresentation rep, uint8_t laneidx>
struct StoreLaneOperator : public Operator1<StoreLaneParameters> {
StoreLaneOperator()
: Operator1(IrOpcode::kStoreLane,
Operator::kNoDeopt | Operator::kNoRead | Operator::kNoThrow,
"StoreLane", 3, 1, 1, 0, 1, 0,
StoreLaneParameters{kind, rep, laneidx}) {}
};
template <MachineRepresentation rep, MachineSemantic sem>
struct Word32AtomicLoadOperator : public Operator1<LoadRepresentation> {
Word32AtomicLoadOperator()
......@@ -1200,6 +1228,39 @@ const Operator* MachineOperatorBuilder::LoadLane(LoadKind kind,
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::StoreLane(LoadKind kind,
MachineRepresentation rep,
uint8_t laneidx) {
#define STORE_LANE_KIND(REP, KIND, LANEIDX) \
if (kind == LoadKind::k##KIND && rep == MachineRepresentation::REP && \
laneidx == LANEIDX) { \
return GetCachedOperator<StoreLaneOperator< \
LoadKind::k##KIND, MachineRepresentation::REP, LANEIDX>>(); \
}
#define STORE_LANE_T(T, LANE) \
STORE_LANE_KIND(T, Normal, LANE) \
STORE_LANE_KIND(T, Unaligned, LANE) \
STORE_LANE_KIND(T, Protected, LANE)
#define STORE_LANE_WORD8(LANE) STORE_LANE_T(kWord8, LANE)
#define STORE_LANE_WORD16(LANE) STORE_LANE_T(kWord16, LANE)
#define STORE_LANE_WORD32(LANE) STORE_LANE_T(kWord32, LANE)
#define STORE_LANE_WORD64(LANE) STORE_LANE_T(kWord64, LANE)
// Semicolons unnecessary, but helps formatting.
SIMD_I8x16_LANES(STORE_LANE_WORD8);
SIMD_I16x8_LANES(STORE_LANE_WORD16);
SIMD_I32x4_LANES(STORE_LANE_WORD32);
SIMD_I64x2_LANES(STORE_LANE_WORD64);
#undef STORE_LANE_WORD8
#undef STORE_LANE_WORD16
#undef STORE_LANE_WORD32
#undef STORE_LANE_WORD64
#undef STORE_LANE_KIND
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) {
DCHECK_LE(0, size);
DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16);
......
......@@ -49,6 +49,7 @@ using LoadRepresentation = MachineType;
V8_EXPORT_PRIVATE LoadRepresentation LoadRepresentationOf(Operator const*)
V8_WARN_UNUSED_RESULT;
// TODO(zhin): This is used by StoreLane too, rename this.
enum class LoadKind {
kNormal,
kUnaligned,
......@@ -135,6 +136,17 @@ using UnalignedStoreRepresentation = MachineRepresentation;
UnalignedStoreRepresentation const& UnalignedStoreRepresentationOf(
Operator const*) V8_WARN_UNUSED_RESULT;
struct StoreLaneParameters {
LoadKind kind;
MachineRepresentation rep;
uint8_t laneidx;
};
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, StoreLaneParameters);
V8_EXPORT_PRIVATE StoreLaneParameters const& StoreLaneParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT;
class StackSlotRepresentation final {
public:
StackSlotRepresentation(int size, int alignment)
......@@ -801,6 +813,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* Store(StoreRepresentation rep);
const Operator* ProtectedStore(MachineRepresentation rep);
// SIMD store: store a specified lane of value into [base + index].
const Operator* StoreLane(LoadKind kind, MachineRepresentation rep,
uint8_t laneidx);
// unaligned load [base + index]
const Operator* UnalignedLoad(LoadRepresentation rep);
......
......@@ -959,7 +959,8 @@
V(V8x16AnyTrue) \
V(V8x16AllTrue) \
V(LoadTransform) \
V(LoadLane)
V(LoadLane) \
V(StoreLane)
#define VALUE_OP_LIST(V) \
COMMON_OP_LIST(V) \
......
......@@ -16,6 +16,7 @@
#include "src/codegen/code-factory.h"
#include "src/codegen/compiler.h"
#include "src/codegen/interface-descriptors.h"
#include "src/codegen/machine-type.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/compiler/backend/code-generator.h"
#include "src/compiler/backend/instruction-selector.h"
......@@ -4110,6 +4111,37 @@ Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype,
return load;
}
Node* WasmGraphBuilder::StoreLane(MachineRepresentation mem_rep, Node* index,
uint32_t offset, uint32_t alignment,
Node* val, uint8_t laneidx,
wasm::WasmCodePosition position,
wasm::ValueType type) {
Node* store;
has_simd_ = true;
index = BoundsCheckMem(i::ElementSizeInBytes(mem_rep), index, offset,
position, kCanOmitBoundsCheck);
MachineType memtype = MachineType(mem_rep, MachineSemantic::kNone);
LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler());
// {offset} is validated to be within uintptr_t range in {BoundsCheckMem}.
uintptr_t capped_offset = static_cast<uintptr_t>(offset);
store = SetEffect(graph()->NewNode(
mcgraph()->machine()->StoreLane(load_kind, mem_rep, laneidx),
MemBuffer(capped_offset), index, val, effect(), control()));
if (load_kind == LoadKind::kProtected) {
SetSourcePosition(store, position);
}
if (FLAG_trace_wasm_memory) {
TraceMemoryOperation(true, mem_rep, index, capped_offset, position);
}
return store;
}
Node* WasmGraphBuilder::StoreMem(MachineRepresentation mem_rep, Node* index,
uint64_t offset, uint32_t alignment, Node* val,
wasm::WasmCodePosition position,
......
......@@ -325,6 +325,9 @@ class WasmGraphBuilder {
Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint64_t offset,
uint32_t alignment, Node* val, wasm::WasmCodePosition position,
wasm::ValueType type);
Node* StoreLane(MachineRepresentation mem_rep, Node* index, uint32_t offset,
uint32_t alignment, Node* val, uint8_t laneidx,
wasm::WasmCodePosition position, wasm::ValueType type);
static void PrintDebugName(Node* node);
void set_instance_node(Node* instance_node) {
......
......@@ -2374,6 +2374,12 @@ class LiftoffCompiler {
}
}
void StoreLane(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const Value& index,
const Value& value, const uint8_t laneidx) {
unsupported(decoder, kSimd, "simd load lane");
}
void CurrentMemoryPages(FullDecoder* decoder, Value* result) {
Register mem_size = __ GetUnusedRegister(kGpReg, {}).gp();
LOAD_INSTANCE_FIELD(mem_size, MemorySize, kSystemPointerSize);
......
......@@ -983,6 +983,8 @@ struct ControlBase : public PcForErrors<validate> {
Value* result) \
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value) \
F(StoreLane, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value, const uint8_t laneidx) \
F(CurrentMemoryPages, Value* result) \
F(MemoryGrow, const Value& value, Value* result) \
F(CallDirect, const CallFunctionImmediate<validate>& imm, \
......@@ -1749,7 +1751,11 @@ class WasmDecoder : public Decoder {
case kExprS128Load8Lane:
case kExprS128Load16Lane:
case kExprS128Load32Lane:
case kExprS128Load64Lane: {
case kExprS128Load64Lane:
case kExprS128Store8Lane:
case kExprS128Store16Lane:
case kExprS128Store32Lane:
case kExprS128Store64Lane: {
MemoryAccessImmediate<validate> imm(decoder, pc + length,
UINT32_MAX);
// 1 more byte for lane index immediate.
......@@ -3334,6 +3340,20 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return opcode_length + mem_imm.length + lane_imm.length;
}
int DecodeStoreLane(StoreType type, uint32_t opcode_length) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> mem_imm(this, this->pc_ + opcode_length,
type.size_log_2());
SimdLaneImmediate<validate> lane_imm(
this, this->pc_ + opcode_length + mem_imm.length);
Value v128 = Pop(1, kWasmS128);
Value index = Pop(0, kWasmI32);
CALL_INTERFACE_IF_REACHABLE(StoreLane, type, mem_imm, index, v128,
lane_imm.lane);
return opcode_length + mem_imm.length + lane_imm.length;
}
int DecodeStoreMem(StoreType store, int prefix_len = 1) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len,
......@@ -3578,6 +3598,18 @@ class WasmFullDecoder : public WasmDecoder<validate> {
case kExprS128Load64Lane: {
return DecodeLoadLane(LoadType::kI64Load, opcode_length);
}
case kExprS128Store8Lane: {
return DecodeStoreLane(StoreType::kI32Store8, opcode_length);
}
case kExprS128Store16Lane: {
return DecodeStoreLane(StoreType::kI32Store16, opcode_length);
}
case kExprS128Store32Lane: {
return DecodeStoreLane(StoreType::kI32Store, opcode_length);
}
case kExprS128Store64Lane: {
return DecodeStoreLane(StoreType::kI64Store, opcode_length);
}
case kExprS128Const:
return SimdConstOp(opcode_length);
default: {
......
......@@ -450,6 +450,13 @@ class WasmGraphBuildingInterface {
value.node, decoder->position(), type.value_type());
}
void StoreLane(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const Value& index,
const Value& value, const uint8_t laneidx) {
BUILD(StoreLane, type.mem_rep(), index.node, imm.offset, imm.alignment,
value.node, laneidx, decoder->position(), type.value_type());
}
void CurrentMemoryPages(FullDecoder* decoder, Value* result) {
result->node = BUILD(CurrentMemoryPages);
}
......
......@@ -313,6 +313,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Load16Lane, "load16_lane")
CASE_S128_OP(Load32Lane, "load32_lane")
CASE_S128_OP(Load64Lane, "load64_lane")
CASE_S128_OP(Store8Lane, "store8_lane")
CASE_S128_OP(Store16Lane, "store16_lane")
CASE_S128_OP(Store32Lane, "store32_lane")
CASE_S128_OP(Store64Lane, "store64_lane")
CASE_I8x16_OP(RoundingAverageU, "avgr_u")
CASE_I16x8_OP(RoundingAverageU, "avgr_u")
......
......@@ -465,7 +465,11 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(S128Load8Lane, 0xfd58, s_is) \
V(S128Load16Lane, 0xfd59, s_is) \
V(S128Load32Lane, 0xfd5a, s_is) \
V(S128Load64Lane, 0xfd5b, s_is)
V(S128Load64Lane, 0xfd5b, s_is) \
V(S128Store8Lane, 0xfd5c, v_is) \
V(S128Store16Lane, 0xfd5d, v_is) \
V(S128Store32Lane, 0xfd5e, v_is) \
V(S128Store64Lane, 0xfd5f, v_is)
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \
......
......@@ -3688,6 +3688,81 @@ WASM_SIMD_TEST_NO_LOWERING(S128Load64Lane) {
RunLoadLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Load64Lane,
kExprI64x2Splat);
}
template <typename T>
void RunStoreLaneTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode store_op, WasmOpcode splat_op) {
FLAG_SCOPE(wasm_simd_post_mvp);
if (execution_tier == TestExecutionTier::kLiftoff) {
// Not yet implemented.
return;
}
constexpr int lanes = kSimd128Size / sizeof(T);
constexpr int mem_index = 16; // Store from mem index 16 (bytes).
constexpr int splat_value = 33;
WasmOpcode const_op =
splat_op == kExprI64x2Splat ? kExprI64Const : kExprI32Const;
for (int lane_index = 0; lane_index < lanes; lane_index++) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
T* memory = r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
// Splat splat_value, then only Store and replace a single lane with the
BUILD(r, WASM_I32V(mem_index), const_op, splat_value,
WASM_SIMD_OP(splat_op), WASM_SIMD_OP(store_op), ZERO_ALIGNMENT,
ZERO_OFFSET, lane_index, WASM_ONE);
r.builder().BlankMemory();
r.Call();
for (int i = 0; i < lanes; i++) {
CHECK_EQ(0, r.builder().ReadMemory(&memory[i]));
}
CHECK_EQ(splat_value, r.builder().ReadMemory(&memory[lanes]));
for (int i = lanes + 1; i < lanes * 2; i++) {
CHECK_EQ(0, r.builder().ReadMemory(&memory[i]));
}
}
// OOB stores
{
WasmRunner<int32_t, uint32_t> r(execution_tier, lower_simd);
r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
BUILD(r, WASM_GET_LOCAL(0), const_op, splat_value, WASM_SIMD_OP(splat_op),
WASM_SIMD_OP(store_op), ZERO_ALIGNMENT, ZERO_OFFSET, 0, WASM_ONE);
// StoreLane stores sizeof(T) bytes.
for (uint32_t index = kWasmPageSize - (sizeof(T) - 1);
index < kWasmPageSize; ++index) {
CHECK_TRAP(r.Call(index));
}
}
}
WASM_SIMD_TEST_NO_LOWERING(S128Store8Lane) {
RunStoreLaneTest<int8_t>(execution_tier, lower_simd, kExprS128Store8Lane,
kExprI8x16Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Store16Lane) {
RunStoreLaneTest<int16_t>(execution_tier, lower_simd, kExprS128Store16Lane,
kExprI16x8Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Store32Lane) {
RunStoreLaneTest<int32_t>(execution_tier, lower_simd, kExprS128Store32Lane,
kExprI32x4Splat);
}
WASM_SIMD_TEST_NO_LOWERING(S128Store64Lane) {
RunStoreLaneTest<int64_t>(execution_tier, lower_simd, kExprS128Store64Lane,
kExprI64x2Splat);
}
#endif // V8_TARGET_ARCH_X64
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
......
......@@ -2692,6 +2692,22 @@ class WasmInterpreterInternals {
return DoSimdLoadLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
case kExprS128Store8Lane: {
return DoSimdStoreLane<int16, int32_t, int8_t>(
decoder, code, pc, len, MachineRepresentation::kWord8);
}
case kExprS128Store16Lane: {
return DoSimdStoreLane<int8, int32_t, int16_t>(
decoder, code, pc, len, MachineRepresentation::kWord16);
}
case kExprS128Store32Lane: {
return DoSimdStoreLane<int4, int32_t, int32_t>(
decoder, code, pc, len, MachineRepresentation::kWord32);
}
case kExprS128Store64Lane: {
return DoSimdStoreLane<int2, int64_t, int64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
default:
return false;
}
......@@ -2770,6 +2786,30 @@ class WasmInterpreterInternals {
return true;
}
template <typename s_type, typename result_type, typename load_type>
bool DoSimdStoreLane(Decoder* decoder, InterpreterCode* code, pc_t pc,
int* const len, MachineRepresentation rep) {
// Extract a single lane, push it onto the stack, then store the lane.
s_type value = Pop().to_s128().to<s_type>();
MemoryAccessImmediate<Decoder::kNoValidation> imm(
decoder, code->at(pc + *len), sizeof(load_type));
SimdLaneImmediate<Decoder::kNoValidation> lane_imm(
decoder, code->at(pc + *len + imm.length));
Push(WasmValue(value.val[LANE(lane_imm.lane, value)]));
// ExecuteStore will update the len, so pass it unchanged here.
if (!ExecuteStore<result_type, load_type>(decoder, code, pc, len, rep,
/*prefix_len=*/*len)) {
return false;
}
*len += lane_imm.length;
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment