Commit df54d511 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement I64x2Splat on x64

Bug: v8:8460
Change-Id: Id159c81cd2d25924be96e49c64073e154ef32e6a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1667867Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Auto-Submit: Zhi An Ng <zhin@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62475}
parent 0c2cd565
......@@ -2883,6 +2883,18 @@ void Assembler::movd(Register dst, XMMRegister src) {
}
void Assembler::movq(XMMRegister dst, Register src) {
// Mixing AVX and non-AVX is expensive, catch those cases
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x6E);
emit_sse_operand(dst, src);
}
void Assembler::movq(XMMRegister dst, Operand src) {
// Mixing AVX and non-AVX is expensive, catch those cases
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
......@@ -2893,6 +2905,7 @@ void Assembler::movq(XMMRegister dst, Register src) {
}
void Assembler::movq(Register dst, XMMRegister src) {
// Mixing AVX and non-AVX is expensive, catch those cases
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0x66);
......@@ -2903,6 +2916,7 @@ void Assembler::movq(Register dst, XMMRegister src) {
}
void Assembler::movq(XMMRegister dst, XMMRegister src) {
// Mixing AVX and non-AVX is expensive, catch those cases
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
if (dst.low_bits() == 4) {
......
......@@ -969,6 +969,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movd(XMMRegister dst, Operand src);
void movd(Register dst, XMMRegister src);
void movq(XMMRegister dst, Register src);
void movq(XMMRegister dst, Operand src);
void movq(Register dst, XMMRegister src);
void movq(XMMRegister dst, XMMRegister src);
......
......@@ -1849,6 +1849,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Lt(node);
case IrOpcode::kF32x4Le:
return MarkAsSimd128(node), VisitF32x4Le(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI32x4Splat:
return MarkAsSimd128(node), VisitI32x4Splat(node);
case IrOpcode::kI32x4ExtractLane:
......@@ -2492,6 +2494,10 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC
// !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2400,6 +2400,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I64x2Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
__ movq(dst, i.InputRegister(0));
} else {
__ movq(dst, i.InputOperand(0));
}
__ pshufd(dst, dst, 0x44);
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
......
......@@ -177,6 +177,7 @@ namespace compiler {
V(X64F32x4Ne) \
V(X64F32x4Lt) \
V(X64F32x4Le) \
V(X64I64x2Splat) \
V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
......
......@@ -143,6 +143,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Ne:
case kX64F32x4Lt:
case kX64F32x4Le:
case kX64I64x2Splat:
case kX64I32x4Splat:
case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
......
......@@ -2668,6 +2668,7 @@ void InstructionSelector::VisitS128Zero(Node* node) {
g.Use(node->InputAt(0))); \
}
SIMD_TYPES(VISIT_SIMD_SPLAT)
VISIT_SIMD_SPLAT(I64x2)
#undef VISIT_SIMD_SPLAT
#define VISIT_SIMD_EXTRACT_LANE(Type) \
......
......@@ -262,6 +262,7 @@ MachineType AtomicOpType(Operator const* op) {
V(F32x4Ne, Operator::kCommutative, 2, 0, 1) \
V(F32x4Lt, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertF32x4, Operator::kNoProperties, 1, 0, 1) \
V(I32x4SConvertI16x8Low, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -489,6 +489,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Lt();
const Operator* F32x4Le();
const Operator* I64x2Splat();
const Operator* I32x4Splat();
const Operator* I32x4ExtractLane(int32_t);
const Operator* I32x4ReplaceLane(int32_t);
......
......@@ -745,6 +745,7 @@
V(F32x4Le) \
V(F32x4Gt) \
V(F32x4Ge) \
V(I64x2Splat) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
V(I32x4ReplaceLane) \
......
......@@ -16,6 +16,7 @@ namespace internal {
namespace compiler {
namespace {
static const int kNumLanes64 = 2;
static const int kNumLanes32 = 4;
static const int kNumLanes16 = 8;
static const int kNumLanes8 = 16;
......@@ -76,6 +77,8 @@ void SimdScalarLowering::LowerGraph() {
}
}
#define FOREACH_INT64X2_OPCODE(V) V(I64x2Splat)
#define FOREACH_INT32X4_OPCODE(V) \
V(I32x4Splat) \
V(I32x4ExtractLane) \
......@@ -208,6 +211,8 @@ void SimdScalarLowering::LowerGraph() {
MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
switch (simdType) {
case SimdType::kInt64x2:
return MachineType::Int64();
case SimdType::kFloat32x4:
return MachineType::Float32();
case SimdType::kInt32x4:
......@@ -223,6 +228,10 @@ MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
switch (node->opcode()) {
#define CASE_STMT(name) case IrOpcode::k##name:
FOREACH_INT64X2_OPCODE(CASE_STMT) {
replacements_[node->id()].type = SimdType::kInt64x2;
break;
}
FOREACH_INT32X4_OPCODE(CASE_STMT)
case IrOpcode::kReturn:
case IrOpcode::kParameter:
......@@ -326,7 +335,9 @@ static int GetReturnCountAfterLoweringSimd128(
int SimdScalarLowering::NumLanes(SimdType type) {
int num_lanes = 0;
if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) {
if (type == SimdType::kInt64x2) {
num_lanes = kNumLanes64;
} else if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) {
num_lanes = kNumLanes32;
} else if (type == SimdType::kInt16x8) {
num_lanes = kNumLanes16;
......@@ -1223,6 +1234,7 @@ void SimdScalarLowering::LowerNode(Node* node) {
LowerUnaryOp(node, SimdType::kInt32x4, machine()->RoundUint32ToFloat32());
break;
}
case IrOpcode::kI64x2Splat:
case IrOpcode::kI32x4Splat:
case IrOpcode::kF32x4Splat:
case IrOpcode::kI16x8Splat:
......
......@@ -32,7 +32,13 @@ class SimdScalarLowering {
private:
enum class State : uint8_t { kUnvisited, kOnStack, kVisited };
enum class SimdType : uint8_t { kFloat32x4, kInt32x4, kInt16x8, kInt8x16 };
enum class SimdType : uint8_t {
kInt64x2,
kFloat32x4,
kInt32x4,
kInt16x8,
kInt8x16
};
#if defined(V8_TARGET_BIG_ENDIAN)
static constexpr int kLaneOffsets[16] = {15, 14, 13, 12, 11, 10, 9, 8,
......
......@@ -4055,6 +4055,8 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Ge:
return graph()->NewNode(mcgraph()->machine()->F32x4Le(), inputs[1],
inputs[0]);
case wasm::kExprI64x2Splat:
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
case wasm::kExprI32x4Splat:
return graph()->NewNode(mcgraph()->machine()->I32x4Splat(), inputs[0]);
case wasm::kExprI32x4SConvertF32x4:
......
......@@ -2134,6 +2134,7 @@ class ThreadImpl {
Push(WasmValue(Simd128(s))); \
return true; \
}
SPLAT_CASE(I64x2, int2, int64_t, 2)
SPLAT_CASE(I32x4, int4, int32_t, 4)
SPLAT_CASE(F32x4, float4, float, 4)
SPLAT_CASE(I16x8, int8, int32_t, 8)
......
......@@ -25,6 +25,7 @@ namespace wasm {
#define CASE_REF_OP(name, str) CASE_OP(Ref##name, "ref." str)
#define CASE_F32x4_OP(name, str) CASE_OP(F32x4##name, "f32x4." str)
#define CASE_I32x4_OP(name, str) CASE_OP(I32x4##name, "i32x4." str)
#define CASE_I64x2_OP(name, str) CASE_OP(I64x2##name, "i64x2." str)
#define CASE_I16x8_OP(name, str) CASE_OP(I16x8##name, "i16x8." str)
#define CASE_I8x16_OP(name, str) CASE_OP(I8x16##name, "i8x16." str)
#define CASE_S128_OP(name, str) CASE_OP(S128##name, "s128." str)
......@@ -252,6 +253,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_SIGN_OP(SIMDI, Ge, "ge")
CASE_SIGN_OP(SIMDI, Shr, "shr")
CASE_SIMDI_OP(Shl, "shl")
CASE_I64x2_OP(Splat, "splat")
CASE_I32x4_OP(AddHoriz, "add_horizontal")
CASE_I16x8_OP(AddHoriz, "add_horizontal")
CASE_SIGN_OP(I16x8, AddSaturate, "add_saturate")
......
......@@ -272,6 +272,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I8x16Splat, 0xfd04, s_i) \
V(I16x8Splat, 0xfd08, s_i) \
V(I32x4Splat, 0xfd0c, s_i) \
V(I64x2Splat, 0xfd0f, s_l) \
V(F32x4Splat, 0xfd12, s_f) \
V(I8x16Eq, 0xfd18, s_ss) \
V(I8x16Ne, 0xfd19, s_ss) \
......@@ -560,6 +561,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(s_f, kWasmS128, kWasmF32) \
V(s_ss, kWasmS128, kWasmS128, kWasmS128) \
V(s_i, kWasmS128, kWasmI32) \
V(s_l, kWasmS128, kWasmI64) \
V(s_si, kWasmS128, kWasmS128, kWasmI32) \
V(i_s, kWasmI32, kWasmS128) \
V(v_is, kWasmStmt, kWasmI32, kWasmS128) \
......
......@@ -17,6 +17,7 @@ namespace wasm {
#define FOREACH_SIMD_TYPE(V) \
V(float, float4, f32x4, 4) \
V(int64_t, int2, i64x2, 2) \
V(int32_t, int4, i32x4, 4) \
V(int16_t, int8, i16x8, 8) \
V(int8_t, int16, i8x16, 16)
......
......@@ -289,6 +289,7 @@ T Sqrt(T a) {
#define WASM_SIMD_F32x4_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprF32x4ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_I64x2_SPLAT(x) WASM_SIMD_SPLAT(I64x2, x)
#define WASM_SIMD_I32x4_SPLAT(x) WASM_SIMD_SPLAT(I32x4, x)
#define WASM_SIMD_I32x4_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprI32x4ExtractLane), TO_BYTE(lane)
......@@ -680,6 +681,26 @@ WASM_SIMD_TEST(F32x4Le) {
RunF32x4CompareOpTest(execution_tier, lower_simd, kExprF32x4Le, LessEqual);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST(I64x2Splat) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
int64_t* g = r.builder().AddGlobal<int64_t>(kWasmS128);
byte param1 = 0;
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(param1))),
WASM_ONE);
FOR_INT64_INPUTS(x) {
r.Call(x);
int64_t expected = x;
for (int i = 0; i < 2; i++) {
int64_t actual = ReadLittleEndianValue<int64_t>(&g[i]);
CHECK_EQ(actual, expected);
}
}
}
#endif // V8_TARGET_ARCH_X64
WASM_SIMD_TEST(I32x4Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment