Commit 7ca9fc8e authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype v128.load32_zero and v128.load64_zero

Prototype in TurboFan x64 and interpreter, bailout in Liftoff.

Suggested in https://github.com/WebAssembly/simd/pull/237.

Bug: v8:10713
Change-Id: I5346c351fb2ec5240b74013e62aef07c46d5d9b6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2300924Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68973}
parent 627b8781
......@@ -3839,6 +3839,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S128LoadMem32Zero: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Movd(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S128LoadMem64Zero: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Movq(i.OutputSimd128Register(), i.MemoryOperand());
break;
}
case kX64S32x4Swizzle: {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
......
......@@ -337,6 +337,8 @@ namespace compiler {
V(X64I32x4Load16x4U) \
V(X64I64x2Load32x2S) \
V(X64I64x2Load32x2U) \
V(X64S128LoadMem32Zero) \
V(X64S128LoadMem64Zero) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
V(X64S16x8Blend) \
......
......@@ -388,6 +388,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4Load16x4U:
case kX64I64x2Load32x2S:
case kX64I64x2Load32x2U:
case kX64S128LoadMem32Zero:
case kX64S128LoadMem64Zero:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
case kX64Peek:
......
......@@ -360,6 +360,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
case LoadTransformation::kI64x2Load32x2U:
opcode = kX64I64x2Load32x2U;
break;
case LoadTransformation::kS128LoadMem32Zero:
opcode = kX64S128LoadMem32Zero;
break;
case LoadTransformation::kS128LoadMem64Zero:
opcode = kX64S128LoadMem64Zero;
break;
default:
UNREACHABLE();
}
......
......@@ -70,6 +70,10 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
return os << "kI64x2Load32x2S";
case LoadTransformation::kI64x2Load32x2U:
return os << "kI64x2Load32x2U";
case LoadTransformation::kS128LoadMem32Zero:
return os << "kS128LoadMem32Zero";
case LoadTransformation::kS128LoadMem64Zero:
return os << "kS128LoadMem64Zero";
}
UNREACHABLE();
}
......@@ -570,7 +574,9 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4Load16x4S) \
V(I32x4Load16x4U) \
V(I64x2Load32x2S) \
V(I64x2Load32x2U)
V(I64x2Load32x2U) \
V(S128LoadMem32Zero) \
V(S128LoadMem64Zero)
#define ATOMIC_U32_TYPE_LIST(V) \
V(Uint8) \
......
......@@ -70,6 +70,8 @@ enum class LoadTransformation {
kI32x4Load16x4U,
kI64x2Load32x2S,
kI64x2Load32x2U,
kS128LoadMem32Zero,
kS128LoadMem64Zero,
};
size_t hash_value(LoadTransformation);
......
......@@ -3701,6 +3701,13 @@ LoadTransformation GetLoadTransformation(
}
break;
}
case wasm::LoadTransformationKind::kZeroExtend: {
if (memtype == MachineType::Int32()) {
return LoadTransformation::kS128LoadMem32Zero;
} else if (memtype == MachineType::Int64()) {
return LoadTransformation::kS128LoadMem64Zero;
}
}
}
UNREACHABLE();
}
......
......@@ -2170,6 +2170,11 @@ class LiftoffCompiler {
return;
}
if (transform == LoadTransformationKind::kZeroExtend) {
unsupported(decoder, kSimd, "prototyping s128 load zero extend");
return;
}
LiftoffRegList pinned;
Register index = pinned.set(__ PopToRegister()).gp();
// For load splats, LoadType is the size of the load, and for load
......
......@@ -1624,6 +1624,7 @@ class WasmDecoder : public Decoder {
return 2 + length;
#define DECLARE_OPCODE_CASE(name, opcode, sig) case kExpr##name:
FOREACH_SIMD_MEM_OPCODE(DECLARE_OPCODE_CASE)
FOREACH_SIMD_POST_MVP_MEM_OPCODE(DECLARE_OPCODE_CASE)
#undef DECLARE_OPCODE_CASE
{
MemoryAccessImmediate<validate> imm(decoder, pc + length + 1,
......@@ -2045,6 +2046,15 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return true;
}
bool CheckSimdPostMvp(WasmOpcode opcode) {
if (!FLAG_wasm_simd_post_mvp && WasmOpcodes::IsSimdPostMvpOpcode(opcode)) {
this->error(
"simd opcode not available, enable with --wasm-simd-post-mvp");
return false;
}
return true;
}
#ifdef DEBUG
class TraceLine {
public:
......@@ -3305,6 +3315,20 @@ class WasmFullDecoder : public WasmDecoder<validate> {
return DecodeLoadMem(LoadType::kS128Load, opcode_length);
case kExprS128StoreMem:
return DecodeStoreMem(StoreType::kS128Store, opcode_length);
case kExprS128LoadMem32Zero:
if (!CheckSimdPostMvp(opcode)) {
return 0;
}
return DecodeLoadTransformMem(LoadType::kI32Load,
LoadTransformationKind::kZeroExtend,
opcode_length);
case kExprS128LoadMem64Zero:
if (!CheckSimdPostMvp(opcode)) {
return 0;
}
return DecodeLoadTransformMem(LoadType::kI64Load,
LoadTransformationKind::kZeroExtend,
opcode_length);
case kExprS8x16LoadSplat:
return DecodeLoadTransformMem(LoadType::kI32Load8S,
LoadTransformationKind::kSplat,
......@@ -3346,10 +3370,7 @@ class WasmFullDecoder : public WasmDecoder<validate> {
case kExprS128Const:
return SimdConstOp(opcode_length);
default: {
if (!FLAG_wasm_simd_post_mvp &&
WasmOpcodes::IsSimdPostMvpOpcode(opcode)) {
this->error(
"simd opcode not available, enable with --wasm-simd-post-mvp");
if (!CheckSimdPostMvp(opcode)) {
return 0;
}
const FunctionSig* sig = WasmOpcodes::Signature(opcode);
......
......@@ -34,7 +34,7 @@ struct FunctionBody {
: sig(sig), offset(offset), start(start), end(end) {}
};
enum class LoadTransformationKind : uint8_t { kSplat, kExtend };
enum class LoadTransformationKind : uint8_t { kSplat, kExtend, kZeroExtend };
V8_EXPORT_PRIVATE DecodeResult VerifyWasmCode(AccountingAllocator* allocator,
const WasmFeatures& enabled,
......
......@@ -302,6 +302,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIMDF_OP(Qfma, "qfma")
CASE_SIMDF_OP(Qfms, "qfms")
CASE_S128_OP(LoadMem32Zero, "load32_zero")
CASE_S128_OP(LoadMem64Zero, "load64_zero")
CASE_S8x16_OP(LoadSplat, "load_splat")
CASE_S16x8_OP(LoadSplat, "load_splat")
CASE_S32x4_OP(LoadSplat, "load_splat")
......@@ -507,6 +509,7 @@ constexpr bool WasmOpcodes::IsSimdPostMvpOpcode(WasmOpcode opcode) {
switch (opcode) {
#define CHECK_OPCODE(name, opcode, _) case kExpr##name:
FOREACH_SIMD_POST_MVP_OPCODE(CHECK_OPCODE)
FOREACH_SIMD_POST_MVP_MEM_OPCODE(CHECK_OPCODE)
#undef CHECK_OPCODE
return true;
default:
......@@ -552,7 +555,7 @@ constexpr WasmOpcodeSig GetAsmJsOpcodeSigIndex(byte opcode) {
constexpr WasmOpcodeSig GetSimdOpcodeSigIndex(byte opcode) {
#define CASE(name, opc, sig) opcode == (opc & 0xFF) ? kSigEnum_##sig:
return FOREACH_SIMD_0_OPERAND_OPCODE(CASE) FOREACH_SIMD_MEM_OPCODE(CASE)
kSigEnum_None;
FOREACH_SIMD_POST_MVP_MEM_OPCODE(CASE) kSigEnum_None;
#undef CASE
}
......
......@@ -438,6 +438,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F32x4SConvertI32x4, 0xfdfa, s_s) \
V(F32x4UConvertI32x4, 0xfdfb, s_s)
#define FOREACH_SIMD_POST_MVP_MEM_OPCODE(V) \
V(S128LoadMem32Zero, 0xfdfc, s_i) \
V(S128LoadMem64Zero, 0xfdfd, s_i)
#define FOREACH_SIMD_POST_MVP_OPCODE(V) \
V(I8x16Mul, 0xfd75, s_ss) \
V(I8x16BitMask, 0xfd64, i_s) \
......@@ -459,8 +463,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I64x2MinU, 0xfdd7, s_ss) \
V(I64x2MaxS, 0xfde2, s_ss) \
V(I64x2MaxU, 0xfdee, s_ss) \
V(F32x4Qfma, 0xfdfc, s_sss) \
V(F32x4Qfms, 0xfdfd, s_sss) \
V(F32x4Qfma, 0xfdb4, s_sss) \
V(F32x4Qfms, 0xfdd4, s_sss) \
V(F64x2Qfma, 0xfdfe, s_sss) \
V(F64x2Qfms, 0xfdff, s_sss) \
V(I16x8AddHoriz, 0xfdaf, s_ss) \
......@@ -641,6 +645,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
FOREACH_SIMD_1_OPERAND_OPCODE(V) \
FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \
FOREACH_SIMD_MEM_OPCODE(V) \
FOREACH_SIMD_POST_MVP_MEM_OPCODE(V) \
FOREACH_SIMD_CONST_OPCODE(V) \
FOREACH_ATOMIC_OPCODE(V) \
FOREACH_ATOMIC_0_OPERAND_OPCODE(V) \
......
......@@ -3609,6 +3609,43 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Load32x2S) {
kExprI64x2Load32x2S);
}
// TODO(v8:10713): Prototyping v128.load32_zero and v128.load64_zero.
#if V8_TARGET_ARCH_X64
template <typename S>
void RunLoadZeroTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) {
FLAG_SCOPE(wasm_simd_post_mvp);
constexpr int lanes_s = kSimd128Size / sizeof(S);
constexpr int mem_index = 16; // Load from mem index 16 (bytes).
WasmRunner<int32_t> r(execution_tier, lower_simd);
S* memory = r.builder().AddMemoryElems<S>(kWasmPageSize / sizeof(S));
S* global = r.builder().AddGlobal<S>(kWasmS128);
BUILD(
r,
WASM_SET_GLOBAL(0, WASM_SIMD_LOAD_ZERO_EXTEND(op, WASM_I32V(mem_index))),
WASM_ONE);
S sentinel = S{-1};
r.builder().WriteMemory(&memory[lanes_s], sentinel);
r.Call();
// Only first lane is set to sentinel.
CHECK_EQ(sentinel, ReadLittleEndianValue<S>(&global[0]));
// The other lanes are zero.
for (int i = 1; i < lanes_s; i++) {
CHECK_EQ(S{0}, ReadLittleEndianValue<S>(&global[i]));
}
}
WASM_SIMD_TEST_NO_LOWERING(S128LoadMem32Zero) {
RunLoadZeroTest<int32_t>(execution_tier, lower_simd, kExprS128LoadMem32Zero);
}
WASM_SIMD_TEST_NO_LOWERING(S128LoadMem64Zero) {
RunLoadZeroTest<int64_t>(execution_tier, lower_simd, kExprS128LoadMem64Zero);
}
#endif // V8_TARGET_ARCH_X64
#define WASM_SIMD_ANYTRUE_TEST(format, lanes, max, param_type) \
WASM_SIMD_TEST(S##format##AnyTrue) { \
FLAG_SCOPE(wasm_simd_post_mvp); \
......@@ -3901,6 +3938,7 @@ WASM_EXTRACT_I16x8_TEST(S, UINT16) WASM_EXTRACT_I16x8_TEST(I, INT16)
#undef WASM_SIMD_F32x4_QFMS
#undef WASM_SIMD_LOAD_SPLAT
#undef WASM_SIMD_LOAD_EXTEND
#undef WASM_SIMD_LOAD_ZERO_EXTEND
} // namespace test_run_wasm_simd
} // namespace wasm
......
......@@ -2671,6 +2671,14 @@ class WasmInterpreterInternals {
return DoSimdLoadExtend<int2, uint64_t, uint32_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
case kExprS128LoadMem32Zero: {
return DoSimdLoadZeroExtend<int4, uint32_t>(
decoder, code, pc, len, MachineRepresentation::kWord32);
}
case kExprS128LoadMem64Zero: {
return DoSimdLoadZeroExtend<int2, uint64_t>(
decoder, code, pc, len, MachineRepresentation::kWord64);
}
default:
return false;
}
......@@ -2714,6 +2722,23 @@ class WasmInterpreterInternals {
return true;
}
template <typename s_type, typename load_type>
bool DoSimdLoadZeroExtend(Decoder* decoder, InterpreterCode* code, pc_t pc,
int* const len, MachineRepresentation rep) {
if (!ExecuteLoad<load_type, load_type>(decoder, code, pc, len, rep,
/*prefix_len=*/*len)) {
return false;
}
load_type v = Pop().to<load_type>();
s_type s;
// All lanes are 0.
for (size_t i = 0; i < arraysize(s.val); i++) s.val[LANE(i, s)] = 0;
// Lane 0 is set to the loaded value.
s.val[LANE(0, s)] = v;
Push(WasmValue(Simd128(s)));
return true;
}
// Check if our control stack (frames_) exceeds the limit. Trigger stack
// overflow if it does, and unwinding the current frame.
// Returns true if execution can continue, false if the stack was fully
......
......@@ -872,6 +872,8 @@ inline WasmOpcode LoadStoreOpcodeOf(MachineType type, bool store) {
index, WASM_SIMD_OP(opcode), ZERO_ALIGNMENT, offset
#define WASM_SIMD_LOAD_EXTEND_ALIGNMENT(opcode, index, alignment) \
index, WASM_SIMD_OP(opcode), alignment, ZERO_OFFSET
#define WASM_SIMD_LOAD_ZERO_EXTEND(opcode, index) \
index, WASM_SIMD_OP(opcode), ZERO_ALIGNMENT, ZERO_OFFSET
//------------------------------------------------------------------------------
// Compilation Hints.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment