Commit 465c97fe authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement load_splat and load_extend

Introduce new operator LoadTransform that holds a LoadTransformInfo param,
which describes the kind of load (normal, unaligned, protected), and a
transformation (splat or extend, signed or unsigned).

We have a new method that a full decoder needs to implement, LoadTransform,
which resuses the existing LoadType we have, but also takes a LoadTransform,
to distinguish between splats and extends at the decoder level.

This implements 4 out of the 10 suggested load splat/extend operations
(to keep the cl smaller), and is also missing interpreter support (will
be added in the future).



Change-Id: I1e65c693bfbe30e2a511c81b5a32e06aacbddc19
Bug: v8:9886
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1863863Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64819}
parent bebe6314
......@@ -1393,6 +1393,10 @@ void InstructionSelector::VisitNode(Node* node) {
MarkAsRepresentation(type.representation(), node);
return VisitLoad(node);
}
case IrOpcode::kLoadTransform: {
MarkAsRepresentation(MachineRepresentation::kSimd128, node);
return VisitLoadTransform(node);
}
case IrOpcode::kPoisonedLoad: {
LoadRepresentation type = LoadRepresentationOf(node->op());
MarkAsRepresentation(type.representation(), node);
......@@ -2662,6 +2666,7 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
#endif // !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
......
......@@ -616,6 +616,7 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
// Visit the load node with a value and opcode to replace with.
void VisitLoad(Node* node, Node* value, InstructionCode opcode);
void VisitLoadTransform(Node* node, Node* value, InstructionCode opcode);
void VisitFinishRegion(Node* node);
void VisitParameter(Node* node);
void VisitIfException(Node* node);
......
......@@ -3688,6 +3688,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movq(rsp, tmp);
break;
}
case kX64S8x16LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
__ pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
break;
}
case kX64S16x8LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(), 0);
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}
case kX64I16x8Load8x8S: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64I16x8Load8x8U: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
......
......@@ -310,6 +310,10 @@ namespace compiler {
V(X64S128Select) \
V(X64S8x16Swizzle) \
V(X64S8x16Shuffle) \
V(X64S8x16LoadSplat) \
V(X64S16x8LoadSplat) \
V(X64I16x8Load8x8S) \
V(X64I16x8Load8x8U) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
V(X64S16x8Blend) \
......
......@@ -361,6 +361,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Movsd:
case kX64Movss:
case kX64Movdqu:
case kX64S8x16LoadSplat:
case kX64S16x8LoadSplat:
case kX64I16x8Load8x8S:
case kX64I16x8Load8x8U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kX64Peek:
......
......@@ -328,6 +328,34 @@ void InstructionSelector::VisitAbortCSAAssert(Node* node) {
Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), rdx));
}
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
ArchOpcode opcode = kArchNop;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
opcode = kX64S8x16LoadSplat;
break;
case LoadTransformation::kS16x8LoadSplat:
opcode = kX64S16x8LoadSplat;
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kX64I16x8Load8x8S;
break;
case LoadTransformation::kI16x8Load8x8U:
opcode = kX64I16x8Load8x8U;
break;
default:
UNREACHABLE();
}
// x64 supports unaligned loads
DCHECK_NE(params.kind, LoadKind::kUnaligned);
InstructionCode code = opcode;
if (params.kind == LoadKind::kProtected) {
code |= MiscField::encode(kMemoryAccessProtected);
}
VisitLoad(node, node, code);
}
void InstructionSelector::VisitLoad(Node* node, Node* value,
InstructionCode opcode) {
X64OperandGenerator g(this);
......
......@@ -32,6 +32,56 @@ std::ostream& operator<<(std::ostream& os, StoreRepresentation rep) {
return os << rep.representation() << ", " << rep.write_barrier_kind();
}
size_t hash_value(LoadKind kind) { return static_cast<size_t>(kind); }
std::ostream& operator<<(std::ostream& os, LoadKind kind) {
switch (kind) {
case LoadKind::kNormal:
return os << "kNormal";
case LoadKind::kUnaligned:
return os << "kUnaligned";
case LoadKind::kProtected:
return os << "kProtected";
}
UNREACHABLE();
}
size_t hash_value(LoadTransformation rep) { return static_cast<size_t>(rep); }
std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
switch (rep) {
case LoadTransformation::kS8x16LoadSplat:
return os << "kS8x16LoadSplat";
case LoadTransformation::kS16x8LoadSplat:
return os << "kS16x8LoadSplat";
case LoadTransformation::kI16x8Load8x8S:
return os << "kI16x8Load8x8S";
case LoadTransformation::kI16x8Load8x8U:
return os << "kI16x8Load8x8U";
}
UNREACHABLE();
}
size_t hash_value(LoadTransformParameters params) {
return base::hash_combine(params.kind, params.transformation);
}
std::ostream& operator<<(std::ostream& os, LoadTransformParameters params) {
return os << "(" << params.kind << " " << params.transformation << ")";
}
LoadTransformParameters const& LoadTransformParametersOf(Operator const* op) {
DCHECK_EQ(IrOpcode::kLoadTransform, op->opcode());
return OpParameter<LoadTransformParameters>(op);
}
bool operator==(LoadTransformParameters lhs, LoadTransformParameters rhs) {
return lhs.transformation == rhs.transformation && lhs.kind == rhs.kind;
}
bool operator!=(LoadTransformParameters lhs, LoadTransformParameters rhs) {
return !(lhs == rhs);
}
LoadRepresentation LoadRepresentationOf(Operator const* op) {
DCHECK(IrOpcode::kLoad == op->opcode() ||
......@@ -472,6 +522,12 @@ MachineType AtomicOpType(Operator const* op) {
V(kCompressedPointer) \
V(kCompressed)
#define LOAD_TRANSFORM_LIST(V) \
V(S8x16LoadSplat) \
V(S16x8LoadSplat) \
V(I16x8Load8x8S) \
V(I16x8Load8x8U)
#define ATOMIC_U32_TYPE_LIST(V) \
V(Uint8) \
V(Uint16) \
......@@ -587,6 +643,28 @@ struct MachineOperatorGlobalCache {
MACHINE_TYPE_LIST(LOAD)
#undef LOAD
#define LOAD_TRANSFORM_KIND(TYPE, KIND) \
struct KIND##LoadTransform##TYPE##Operator final \
: public Operator1<LoadTransformParameters> { \
KIND##LoadTransform##TYPE##Operator() \
: Operator1<LoadTransformParameters>( \
IrOpcode::kLoadTransform, \
Operator::kNoDeopt | Operator::kNoThrow | Operator::kNoWrite, \
#KIND "LoadTransform", 2, 1, 1, 1, 1, 0, \
LoadTransformParameters{LoadKind::k##KIND, \
LoadTransformation::k##TYPE}) {} \
}; \
KIND##LoadTransform##TYPE##Operator k##KIND##LoadTransform##TYPE;
#define LOAD_TRANSFORM(TYPE) \
LOAD_TRANSFORM_KIND(TYPE, Normal) \
LOAD_TRANSFORM_KIND(TYPE, Unaligned) \
LOAD_TRANSFORM_KIND(TYPE, Protected)
LOAD_TRANSFORM_LIST(LOAD_TRANSFORM)
#undef LOAD_TRANSFORM
#undef LOAD_TRANSFORM_KIND
#define STACKSLOT(Size, Alignment) \
struct StackSlotOfSize##Size##OfAlignment##Alignment##Operator final \
: public StackSlotOperator { \
......@@ -1017,6 +1095,23 @@ const Operator* MachineOperatorBuilder::ProtectedLoad(LoadRepresentation rep) {
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::LoadTransform(
LoadKind kind, LoadTransformation transform) {
#define LOAD_TRANSFORM_KIND(TYPE, KIND) \
if (kind == LoadKind::k##KIND && transform == LoadTransformation::k##TYPE) { \
return &cache_.k##KIND##LoadTransform##TYPE; \
}
#define LOAD_TRANSFORM(TYPE) \
LOAD_TRANSFORM_KIND(TYPE, Normal) \
LOAD_TRANSFORM_KIND(TYPE, Unaligned) \
LOAD_TRANSFORM_KIND(TYPE, Protected)
LOAD_TRANSFORM_LIST(LOAD_TRANSFORM)
#undef LOAD_TRANSFORM
#undef LOAD_TRANSFORM_KIND
UNREACHABLE();
}
const Operator* MachineOperatorBuilder::StackSlot(int size, int alignment) {
DCHECK_LE(0, size);
DCHECK(alignment == 0 || alignment == 4 || alignment == 8 || alignment == 16);
......@@ -1420,6 +1515,7 @@ StackCheckKind StackCheckKindOf(Operator const* op) {
#undef ATOMIC64_REPRESENTATION_LIST
#undef SIMD_LANE_OP_LIST
#undef STACK_SLOT_CACHED_SIZES_ALIGNMENTS_LIST
#undef LOAD_TRANSFORM_LIST
} // namespace compiler
} // namespace internal
......
......@@ -50,6 +50,40 @@ using LoadRepresentation = MachineType;
V8_EXPORT_PRIVATE LoadRepresentation LoadRepresentationOf(Operator const*)
V8_WARN_UNUSED_RESULT;
enum class LoadKind {
kNormal,
kUnaligned,
kProtected,
};
size_t hash_value(LoadKind);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadKind);
enum class LoadTransformation {
kS8x16LoadSplat,
kS16x8LoadSplat,
kI16x8Load8x8S,
kI16x8Load8x8U,
};
size_t hash_value(LoadTransformation);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadTransformation);
struct LoadTransformParameters {
LoadKind kind;
LoadTransformation transformation;
};
size_t hash_value(LoadTransformParameters);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
LoadTransformParameters);
V8_EXPORT_PRIVATE LoadTransformParameters const& LoadTransformParametersOf(
Operator const*) V8_WARN_UNUSED_RESULT;
// A Store needs a MachineType and a WriteBarrierKind in order to emit the
// correct write barrier.
class StoreRepresentation final {
......@@ -668,6 +702,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* PoisonedLoad(LoadRepresentation rep);
const Operator* ProtectedLoad(LoadRepresentation rep);
const Operator* LoadTransform(LoadKind kind, LoadTransformation transform);
// store [base + index], value
const Operator* Store(StoreRepresentation rep);
const Operator* ProtectedStore(MachineRepresentation rep);
......
......@@ -932,7 +932,8 @@
V(S1x8AnyTrue) \
V(S1x8AllTrue) \
V(S1x16AnyTrue) \
V(S1x16AllTrue)
V(S1x16AllTrue) \
V(LoadTransform)
#define VALUE_OP_LIST(V) \
COMMON_OP_LIST(V) \
......
......@@ -3739,6 +3739,77 @@ Node* WasmGraphBuilder::TraceMemoryOperation(bool is_store,
return call;
}
namespace {
LoadTransformation GetLoadTransformation(
MachineType memtype, wasm::LoadTransformationKind transform) {
switch (transform) {
case wasm::LoadTransformationKind::kSplat: {
if (memtype == MachineType::Int8()) {
return LoadTransformation::kS8x16LoadSplat;
} else if (memtype == MachineType::Int16()) {
return LoadTransformation::kS16x8LoadSplat;
}
break;
}
case wasm::LoadTransformationKind::kExtend: {
if (memtype == MachineType::Int8()) {
return LoadTransformation::kI16x8Load8x8S;
} else if (memtype == MachineType::Uint8()) {
return LoadTransformation::kI16x8Load8x8U;
}
break;
}
}
UNREACHABLE();
}
LoadKind GetLoadKind(MachineGraph* mcgraph, MachineType memtype,
bool use_trap_handler) {
if (memtype.representation() == MachineRepresentation::kWord8 ||
mcgraph->machine()->UnalignedLoadSupported(memtype.representation())) {
if (use_trap_handler) {
return LoadKind::kProtected;
}
return LoadKind::kNormal;
}
// TODO(eholk): Support unaligned loads with trap handlers.
DCHECK(!use_trap_handler);
return LoadKind::kUnaligned;
}
} // namespace
Node* WasmGraphBuilder::LoadTransform(MachineType memtype,
wasm::LoadTransformationKind transform,
Node* index, uint32_t offset,
uint32_t alignment,
wasm::WasmCodePosition position) {
// Wasm semantics throw on OOB. Introduce explicit bounds check and
// conditioning when not using the trap handler.
index = BoundsCheckMem(wasm::ValueTypes::MemSize(memtype), index, offset,
position, kCanOmitBoundsCheck);
LoadTransformation transformation = GetLoadTransformation(memtype, transform);
LoadKind load_kind = GetLoadKind(mcgraph(), memtype, use_trap_handler());
Node* load = SetEffect(graph()->NewNode(
mcgraph()->machine()->LoadTransform(load_kind, transformation),
MemBuffer(offset), index, Effect(), Control()));
if (load_kind == LoadKind::kProtected) {
SetSourcePosition(load, position);
}
#if defined(V8_TARGET_BIG_ENDIAN)
load = BuildChangeEndiannessLoad(load, memtype, wasm::ValueType::kSimd128);
#endif
if (FLAG_trace_wasm_memory) {
TraceMemoryOperation(false, memtype.representation(), index, offset,
position);
}
return load;
}
Node* WasmGraphBuilder::LoadMem(wasm::ValueType type, MachineType memtype,
Node* index, uint32_t offset,
uint32_t alignment,
......
......@@ -44,6 +44,7 @@ using TFNode = compiler::Node;
using TFGraph = compiler::MachineGraph;
class WasmCode;
struct WasmFeatures;
enum class LoadTransformationKind : uint8_t;
} // namespace wasm
namespace compiler {
......@@ -294,6 +295,10 @@ class WasmGraphBuilder {
Node* LoadMem(wasm::ValueType type, MachineType memtype, Node* index,
uint32_t offset, uint32_t alignment,
wasm::WasmCodePosition position);
Node* LoadTransform(MachineType memtype,
wasm::LoadTransformationKind transform, Node* index,
uint32_t offset, uint32_t alignment,
wasm::WasmCodePosition position);
Node* StoreMem(MachineRepresentation mem_rep, Node* index, uint32_t offset,
uint32_t alignment, Node* val, wasm::WasmCodePosition position,
wasm::ValueType type);
......
......@@ -1765,6 +1765,12 @@ class LiftoffCompiler {
offset, decoder->position());
}
}
void LoadTransform(FullDecoder* decoder, LoadType type,
LoadTransformationKind transform,
const MemoryAccessImmediate<validate>& imm,
const Value& index_val, Value* result) {
unsupported(decoder, kSimd, "simd");
}
void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm,
......
......@@ -683,6 +683,11 @@ struct ControlBase {
}
};
enum class LoadTransformationKind : uint8_t {
kSplat,
kExtend,
};
// This is the list of callback functions that an interface for the
// WasmFullDecoder should implement.
// F(Name, args...)
......@@ -733,6 +738,8 @@ struct ControlBase {
F(Else, Control* if_block) \
F(LoadMem, LoadType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, Value* result) \
F(LoadTransform, LoadType type, LoadTransformationKind transform, \
MemoryAccessImmediate<validate>& imm, const Value& index, Value* result) \
F(StoreMem, StoreType type, const MemoryAccessImmediate<validate>& imm, \
const Value& index, const Value& value) \
F(CurrentMemoryPages, Value* result) \
......@@ -2556,6 +2563,16 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return imm.length;
}
int DecodeLoadTransformMem(LoadType type, LoadTransformationKind transform) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> imm(this, this->pc_ + 1, type.size_log_2());
auto index = Pop(0, kWasmI32);
auto* result = Push(ValueType::kWasmS128);
CALL_INTERFACE_IF_REACHABLE(LoadTransform, type, transform, imm, index,
result);
return imm.length;
}
int DecodeStoreMem(StoreType store, int prefix_len = 0) {
if (!CheckHasMemory()) return 0;
MemoryAccessImmediate<validate> imm(this, this->pc_ + prefix_len,
......@@ -2738,6 +2755,22 @@ class WasmFullDecoder : public WasmDecoder<validate> {
case kExprS128StoreMem:
len = DecodeStoreMem(StoreType::kS128Store, 1);
break;
case kExprS8x16LoadSplat:
len = DecodeLoadTransformMem(LoadType::kI32Load8S,
LoadTransformationKind::kSplat);
break;
case kExprS16x8LoadSplat:
len = DecodeLoadTransformMem(LoadType::kI32Load16S,
LoadTransformationKind::kSplat);
break;
case kExprI16x8Load8x8S:
len = DecodeLoadTransformMem(LoadType::kI32Load8S,
LoadTransformationKind::kExtend);
break;
case kExprI16x8Load8x8U:
len = DecodeLoadTransformMem(LoadType::kI32Load8U,
LoadTransformationKind::kExtend);
break;
default: {
FunctionSig* sig = WasmOpcodes::Signature(opcode);
if (!VALIDATE(sig != nullptr)) {
......
......@@ -392,6 +392,14 @@ class WasmGraphBuildingInterface {
imm.offset, imm.alignment, decoder->position());
}
void LoadTransform(FullDecoder* decoder, LoadType type,
LoadTransformationKind transform,
const MemoryAccessImmediate<validate>& imm,
const Value& index, Value* result) {
result->node = BUILD(LoadTransform, type.mem_type(), transform, index.node,
imm.offset, imm.alignment, decoder->position());
}
void StoreMem(FullDecoder* decoder, StoreType type,
const MemoryAccessImmediate<validate>& imm, const Value& index,
const Value& value) {
......
......@@ -322,6 +322,11 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(Qfma, "qfma")
CASE_F32x4_OP(Qfms, "qfms")
CASE_S8x16_OP(LoadSplat, "load_splat")
CASE_S16x8_OP(LoadSplat, "load_splat")
CASE_I16x8_OP(Load8x8S, "load8x8_s")
CASE_I16x8_OP(Load8x8U, "load8x8_u")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -423,6 +423,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(S8x16Swizzle, 0xfdc0, s_ss) \
V(F64x2SConvertI64x2, 0xfdb1, s_s) \
V(F64x2UConvertI64x2, 0xfdb2, s_s) \
V(S8x16LoadSplat, 0xfdc2, s_i) \
V(S16x8LoadSplat, 0xfdc3, s_i) \
V(I8x16SConvertI16x8, 0xfdc6, s_ss) \
V(I8x16UConvertI16x8, 0xfdc7, s_ss) \
V(I16x8SConvertI32x4, 0xfdc8, s_ss) \
......@@ -435,6 +437,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4SConvertI16x8High, 0xfdcf, s_s) \
V(I32x4UConvertI16x8Low, 0xfdd0, s_s) \
V(I32x4UConvertI16x8High, 0xfdd1, s_s) \
V(I16x8Load8x8S, 0xfdd2, s_s) \
V(I16x8Load8x8U, 0xfdd3, s_s) \
V(I16x8AddHoriz, 0xfdbd, s_ss) \
V(I32x4AddHoriz, 0xfdbe, s_ss) \
V(F32x4AddHoriz, 0xfdbf, s_ss) \
......
......@@ -315,8 +315,26 @@ class ValueHelper {
static constexpr Vector<const uint32_t> ror_vector() {
return ArrayVector(ror_array);
}
template <typename T>
static constexpr Vector<const T> GetVector();
};
template <>
constexpr Vector<const int8_t> ValueHelper::GetVector() {
return int8_vector();
}
template <>
constexpr Vector<const uint8_t> ValueHelper::GetVector() {
return uint8_vector();
}
template <>
constexpr Vector<const int16_t> ValueHelper::GetVector() {
return int16_vector();
}
// Helper macros that can be used in FOR_INT32_INPUTS(i) { ... i ... }
#define FOR_INPUTS(ctype, itype, var) \
for (ctype var : ::v8::internal::compiler::ValueHelper::itype##_vector())
......
......@@ -470,6 +470,11 @@ bool ExpectFused(ExecutionTier tier) {
#define WASM_SIMD_F32x4_QFMA(a, b, c) a, b, c, WASM_SIMD_OP(kExprF32x4Qfma)
#define WASM_SIMD_F32x4_QFMS(a, b, c) a, b, c, WASM_SIMD_OP(kExprF32x4Qfms)
#define WASM_SIMD_LOAD_SPLAT(opcode, index) \
index, WASM_SIMD_OP(opcode), ZERO_ALIGNMENT, ZERO_OFFSET
#define WASM_SIMD_LOAD_EXTEND(opcode, index) \
index, WASM_SIMD_OP(opcode), ZERO_ALIGNMENT, ZERO_OFFSET
// Runs tests of compiled code, using the interpreter as a reference.
#define WASM_SIMD_COMPILED_TEST(name) \
void RunWasm_##name##_Impl(LowerSimd lower_simd, \
......@@ -3248,6 +3253,79 @@ WASM_SIMD_TEST(SimdLoadStoreLoadMemargOffset) {
}
}
#if V8_TARGET_ARCH_X64
template <typename T>
void RunLoadSplatTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) {
if (execution_tier == ExecutionTier::kInterpreter) {
// TODO(zhin): implement for interpreter
return;
}
constexpr int lanes = 16 / sizeof(T);
constexpr int mem_index = 16; // Load from mem index 16 (bytes).
WasmRunner<int32_t> r(execution_tier, lower_simd);
T* memory = r.builder().AddMemoryElems<T>(kWasmPageSize / sizeof(T));
T* global = r.builder().AddGlobal<T>(kWasmS128);
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_LOAD_SPLAT(op, WASM_I32V(mem_index))),
WASM_ONE);
for (T x : compiler::ValueHelper::GetVector<T>()) {
// 16-th byte in memory is lanes-th element (size T) of memory.
r.builder().WriteMemory(&memory[lanes], x);
r.Call();
for (int i = 0; i < lanes; i++) {
CHECK_EQ(x, ReadLittleEndianValue<T>(&global[i]));
}
}
}
WASM_SIMD_TEST_NO_LOWERING(S8x16LoadSplat) {
RunLoadSplatTest<int8_t>(execution_tier, lower_simd, kExprS8x16LoadSplat);
}
WASM_SIMD_TEST_NO_LOWERING(S16x8LoadSplat) {
RunLoadSplatTest<int16_t>(execution_tier, lower_simd, kExprS16x8LoadSplat);
}
template <typename S, typename T>
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) {
if (execution_tier == ExecutionTier::kInterpreter) {
// TODO(zhin): implement for interpreter
return;
}
constexpr int lanes_s = 16 / sizeof(S);
constexpr int lanes_t = 16 / sizeof(T);
constexpr int mem_index = 16; // Load from mem index 16 (bytes).
WasmRunner<int32_t> r(execution_tier, lower_simd);
S* memory = r.builder().AddMemoryElems<S>(kWasmPageSize / sizeof(S));
T* global = r.builder().AddGlobal<T>(kWasmS128);
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_LOAD_EXTEND(op, WASM_I32V(mem_index))),
WASM_ONE);
for (S x : compiler::ValueHelper::GetVector<S>()) {
for (int i = 0; i < lanes_s; i++) {
// 16-th byte in memory is lanes-th element (size T) of memory.
r.builder().WriteMemory(&memory[lanes_s + i], x);
}
r.Call();
for (int i = 0; i < lanes_t; i++) {
CHECK_EQ(static_cast<T>(x), ReadLittleEndianValue<T>(&global[i]));
}
}
}
WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8U) {
RunLoadExtendTest<uint8_t, uint16_t>(execution_tier, lower_simd,
kExprI16x8Load8x8U);
}
WASM_SIMD_TEST_NO_LOWERING(I16x8Load8x8S) {
RunLoadExtendTest<int8_t, int16_t>(execution_tier, lower_simd,
kExprI16x8Load8x8S);
}
#endif // V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM64 || \
V8_TARGET_ARCH_ARM
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
......@@ -3469,6 +3547,8 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16)
#undef WASM_SIMD_F64x2_QFMS
#undef WASM_SIMD_F32x4_QFMA
#undef WASM_SIMD_F32x4_QFMS
#undef WASM_SIMD_LOAD_SPLAT
#undef WASM_SIMD_LOAD_EXTEND
} // namespace test_run_wasm_simd
} // namespace wasm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment