Commit 43244a06 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement remaining load_splat for x64

Implements v32x4.load_splat and v64x2.load_splat.

Bug: v8:9886
Change-Id: I18f3b012f9980d258985edf2ff26577fe495eff5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1903747Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64866}
parent 35addcaa
...@@ -3702,6 +3702,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3702,6 +3702,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register()); __ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break; break;
} }
case kX64S32x4LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
// TODO(v8:9886): AVX codegen
__ movss(i.OutputSimd128Register(), i.MemoryOperand());
__ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<byte>(0));
break;
}
case kX64S64x2LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
// TODO(v8:9886): AVX codegen
__ movsd(i.OutputSimd128Register(), i.MemoryOperand());
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}
case kX64I16x8Load8x8S: { case kX64I16x8Load8x8S: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand()); __ pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
......
...@@ -312,6 +312,8 @@ namespace compiler { ...@@ -312,6 +312,8 @@ namespace compiler {
V(X64S8x16Shuffle) \ V(X64S8x16Shuffle) \
V(X64S8x16LoadSplat) \ V(X64S8x16LoadSplat) \
V(X64S16x8LoadSplat) \ V(X64S16x8LoadSplat) \
V(X64S32x4LoadSplat) \
V(X64S64x2LoadSplat) \
V(X64I16x8Load8x8S) \ V(X64I16x8Load8x8S) \
V(X64I16x8Load8x8U) \ V(X64I16x8Load8x8U) \
V(X64S32x4Swizzle) \ V(X64S32x4Swizzle) \
......
...@@ -363,6 +363,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -363,6 +363,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Movdqu: case kX64Movdqu:
case kX64S8x16LoadSplat: case kX64S8x16LoadSplat:
case kX64S16x8LoadSplat: case kX64S16x8LoadSplat:
case kX64S32x4LoadSplat:
case kX64S64x2LoadSplat:
case kX64I16x8Load8x8S: case kX64I16x8Load8x8S:
case kX64I16x8Load8x8U: case kX64I16x8Load8x8U:
return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect; return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
......
...@@ -338,6 +338,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) { ...@@ -338,6 +338,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
case LoadTransformation::kS16x8LoadSplat: case LoadTransformation::kS16x8LoadSplat:
opcode = kX64S16x8LoadSplat; opcode = kX64S16x8LoadSplat;
break; break;
case LoadTransformation::kS32x4LoadSplat:
opcode = kX64S32x4LoadSplat;
break;
case LoadTransformation::kS64x2LoadSplat:
opcode = kX64S64x2LoadSplat;
break;
case LoadTransformation::kI16x8Load8x8S: case LoadTransformation::kI16x8Load8x8S:
opcode = kX64I16x8Load8x8S; opcode = kX64I16x8Load8x8S;
break; break;
......
...@@ -54,6 +54,10 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) { ...@@ -54,6 +54,10 @@ std::ostream& operator<<(std::ostream& os, LoadTransformation rep) {
return os << "kS8x16LoadSplat"; return os << "kS8x16LoadSplat";
case LoadTransformation::kS16x8LoadSplat: case LoadTransformation::kS16x8LoadSplat:
return os << "kS16x8LoadSplat"; return os << "kS16x8LoadSplat";
case LoadTransformation::kS32x4LoadSplat:
return os << "kS32x4LoadSplat";
case LoadTransformation::kS64x2LoadSplat:
return os << "kS64x2LoadSplat";
case LoadTransformation::kI16x8Load8x8S: case LoadTransformation::kI16x8Load8x8S:
return os << "kI16x8Load8x8S"; return os << "kI16x8Load8x8S";
case LoadTransformation::kI16x8Load8x8U: case LoadTransformation::kI16x8Load8x8U:
...@@ -525,6 +529,8 @@ MachineType AtomicOpType(Operator const* op) { ...@@ -525,6 +529,8 @@ MachineType AtomicOpType(Operator const* op) {
#define LOAD_TRANSFORM_LIST(V) \ #define LOAD_TRANSFORM_LIST(V) \
V(S8x16LoadSplat) \ V(S8x16LoadSplat) \
V(S16x8LoadSplat) \ V(S16x8LoadSplat) \
V(S32x4LoadSplat) \
V(S64x2LoadSplat) \
V(I16x8Load8x8S) \ V(I16x8Load8x8S) \
V(I16x8Load8x8U) V(I16x8Load8x8U)
......
...@@ -63,6 +63,8 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadKind); ...@@ -63,6 +63,8 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, LoadKind);
enum class LoadTransformation { enum class LoadTransformation {
kS8x16LoadSplat, kS8x16LoadSplat,
kS16x8LoadSplat, kS16x8LoadSplat,
kS32x4LoadSplat,
kS64x2LoadSplat,
kI16x8Load8x8S, kI16x8Load8x8S,
kI16x8Load8x8U, kI16x8Load8x8U,
}; };
......
...@@ -3685,6 +3685,10 @@ LoadTransformation GetLoadTransformation( ...@@ -3685,6 +3685,10 @@ LoadTransformation GetLoadTransformation(
return LoadTransformation::kS8x16LoadSplat; return LoadTransformation::kS8x16LoadSplat;
} else if (memtype == MachineType::Int16()) { } else if (memtype == MachineType::Int16()) {
return LoadTransformation::kS16x8LoadSplat; return LoadTransformation::kS16x8LoadSplat;
} else if (memtype == MachineType::Int32()) {
return LoadTransformation::kS32x4LoadSplat;
} else if (memtype == MachineType::Int64()) {
return LoadTransformation::kS64x2LoadSplat;
} }
break; break;
} }
......
...@@ -2763,6 +2763,14 @@ class WasmFullDecoder : public WasmDecoder<validate> { ...@@ -2763,6 +2763,14 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = DecodeLoadTransformMem(LoadType::kI32Load16S, len = DecodeLoadTransformMem(LoadType::kI32Load16S,
LoadTransformationKind::kSplat); LoadTransformationKind::kSplat);
break; break;
case kExprS32x4LoadSplat:
len = DecodeLoadTransformMem(LoadType::kI32Load,
LoadTransformationKind::kSplat);
break;
case kExprS64x2LoadSplat:
len = DecodeLoadTransformMem(LoadType::kI64Load,
LoadTransformationKind::kSplat);
break;
case kExprI16x8Load8x8S: case kExprI16x8Load8x8S:
len = DecodeLoadTransformMem(LoadType::kI32Load8S, len = DecodeLoadTransformMem(LoadType::kI32Load8S,
LoadTransformationKind::kExtend); LoadTransformationKind::kExtend);
......
...@@ -31,6 +31,7 @@ namespace wasm { ...@@ -31,6 +31,7 @@ namespace wasm {
#define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str) #define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str)
#define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str) #define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str)
#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str) #define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str)
#define CASE_S64x2_OP(name, str) CASE_OP(S64x2##name, "s64x2." str)
#define CASE_S32x4_OP(name, str) CASE_OP(S32x4##name, "s32x4." str) #define CASE_S32x4_OP(name, str) CASE_OP(S32x4##name, "s32x4." str)
#define CASE_S16x8_OP(name, str) CASE_OP(S16x8##name, "s16x8." str) #define CASE_S16x8_OP(name, str) CASE_OP(S16x8##name, "s16x8." str)
#define CASE_S8x16_OP(name, str) CASE_OP(S8x16##name, "s8x16." str) #define CASE_S8x16_OP(name, str) CASE_OP(S8x16##name, "s8x16." str)
...@@ -324,6 +325,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -324,6 +325,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S8x16_OP(LoadSplat, "load_splat") CASE_S8x16_OP(LoadSplat, "load_splat")
CASE_S16x8_OP(LoadSplat, "load_splat") CASE_S16x8_OP(LoadSplat, "load_splat")
CASE_S32x4_OP(LoadSplat, "load_splat")
CASE_S64x2_OP(LoadSplat, "load_splat")
CASE_I16x8_OP(Load8x8S, "load8x8_s") CASE_I16x8_OP(Load8x8S, "load8x8_s")
CASE_I16x8_OP(Load8x8U, "load8x8_u") CASE_I16x8_OP(Load8x8U, "load8x8_u")
...@@ -359,6 +362,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -359,6 +362,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
#undef CASE_I16x8_OP #undef CASE_I16x8_OP
#undef CASE_I8x16_OP #undef CASE_I8x16_OP
#undef CASE_S128_OP #undef CASE_S128_OP
#undef CASE_S64x2_OP
#undef CASE_S32x4_OP #undef CASE_S32x4_OP
#undef CASE_S16x8_OP #undef CASE_S16x8_OP
#undef CASE_S8x16_OP #undef CASE_S8x16_OP
......
...@@ -425,6 +425,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&); ...@@ -425,6 +425,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F64x2UConvertI64x2, 0xfdb2, s_s) \ V(F64x2UConvertI64x2, 0xfdb2, s_s) \
V(S8x16LoadSplat, 0xfdc2, s_i) \ V(S8x16LoadSplat, 0xfdc2, s_i) \
V(S16x8LoadSplat, 0xfdc3, s_i) \ V(S16x8LoadSplat, 0xfdc3, s_i) \
V(S32x4LoadSplat, 0xfdc4, s_i) \
V(S64x2LoadSplat, 0xfdc5, s_i) \
V(I8x16SConvertI16x8, 0xfdc6, s_ss) \ V(I8x16SConvertI16x8, 0xfdc6, s_ss) \
V(I8x16UConvertI16x8, 0xfdc7, s_ss) \ V(I8x16UConvertI16x8, 0xfdc7, s_ss) \
V(I16x8SConvertI32x4, 0xfdc8, s_ss) \ V(I16x8SConvertI32x4, 0xfdc8, s_ss) \
......
...@@ -335,6 +335,16 @@ constexpr Vector<const int16_t> ValueHelper::GetVector() { ...@@ -335,6 +335,16 @@ constexpr Vector<const int16_t> ValueHelper::GetVector() {
return int16_vector(); return int16_vector();
} }
template <>
constexpr Vector<const int32_t> ValueHelper::GetVector() {
return int32_vector();
}
template <>
constexpr Vector<const int64_t> ValueHelper::GetVector() {
return int64_vector();
}
// Helper macros that can be used in FOR_INT32_INPUTS(i) { ... i ... } // Helper macros that can be used in FOR_INT32_INPUTS(i) { ... i ... }
#define FOR_INPUTS(ctype, itype, var) \ #define FOR_INPUTS(ctype, itype, var) \
for (ctype var : ::v8::internal::compiler::ValueHelper::itype##_vector()) for (ctype var : ::v8::internal::compiler::ValueHelper::itype##_vector())
......
...@@ -3287,6 +3287,14 @@ WASM_SIMD_TEST_NO_LOWERING(S16x8LoadSplat) { ...@@ -3287,6 +3287,14 @@ WASM_SIMD_TEST_NO_LOWERING(S16x8LoadSplat) {
RunLoadSplatTest<int16_t>(execution_tier, lower_simd, kExprS16x8LoadSplat); RunLoadSplatTest<int16_t>(execution_tier, lower_simd, kExprS16x8LoadSplat);
} }
WASM_SIMD_TEST_NO_LOWERING(S32x4LoadSplat) {
RunLoadSplatTest<int32_t>(execution_tier, lower_simd, kExprS32x4LoadSplat);
}
WASM_SIMD_TEST_NO_LOWERING(S64x2LoadSplat) {
RunLoadSplatTest<int64_t>(execution_tier, lower_simd, kExprS64x2LoadSplat);
}
template <typename S, typename T> template <typename S, typename T>
void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunLoadExtendTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode op) { WasmOpcode op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment