Commit 4edb56af authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement F64x2ExtractLane F64x2ReplaceLane for x64

Bug: v8:8460
Change-Id: Icd1d047c319450f73f1e728db0ca74fdd70b994d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1690709Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62723}
parent 889be09d
......@@ -1813,6 +1813,10 @@ void InstructionSelector::VisitNode(Node* node) {
return VisitUnsafePointerAdd(node);
case IrOpcode::kF64x2Splat:
return MarkAsSimd128(node), VisitF64x2Splat(node);
case IrOpcode::kF64x2ExtractLane:
return MarkAsFloat64(node), VisitF64x2ExtractLane(node);
case IrOpcode::kF64x2ReplaceLane:
return MarkAsSimd128(node), VisitF64x2ReplaceLane(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
......@@ -2526,6 +2530,8 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
......
......@@ -2253,6 +2253,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) {
__ movq(kScratchRegister, i.InputDoubleRegister(2));
__ pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
case kX64F64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ movq(i.OutputDoubleRegister(), kScratchRegister);
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
......
......@@ -159,6 +159,8 @@ namespace compiler {
V(X64Peek) \
V(X64StackCheck) \
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
......
......@@ -125,6 +125,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Dec32:
case kX64Inc32:
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
......
......@@ -2585,6 +2585,7 @@ VISIT_ATOMIC_BINOP(Xor)
#undef VISIT_ATOMIC_BINOP
#define SIMD_TYPES(V) \
V(F64x2) \
V(F32x4) \
V(I64x2) \
V(I32x4) \
......@@ -2722,7 +2723,6 @@ void InstructionSelector::VisitS128Zero(Node* node) {
g.Use(node->InputAt(0))); \
}
SIMD_TYPES(VISIT_SIMD_SPLAT)
VISIT_SIMD_SPLAT(F64x2)
#undef VISIT_SIMD_SPLAT
#define VISIT_SIMD_EXTRACT_LANE(Type) \
......
......@@ -451,6 +451,7 @@ MachineType AtomicOpType(Operator const* op) {
V(Exchange)
#define SIMD_LANE_OP_LIST(V) \
V(F64x2, 2) \
V(F32x4, 4) \
V(I64x2, 2) \
V(I32x4, 4) \
......
......@@ -469,6 +469,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
// SIMD operators.
const Operator* F64x2Splat();
const Operator* F64x2ExtractLane(int32_t);
const Operator* F64x2ReplaceLane(int32_t);
const Operator* F32x4Splat();
const Operator* F32x4ExtractLane(int32_t);
const Operator* F32x4ReplaceLane(int32_t);
......
......@@ -741,6 +741,8 @@
#define MACHINE_SIMD_OP_LIST(V) \
V(F64x2Splat) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
V(F32x4Splat) \
V(F32x4ExtractLane) \
V(F32x4ReplaceLane) \
......
......@@ -4360,6 +4360,12 @@ Node* WasmGraphBuilder::SimdLaneOp(wasm::WasmOpcode opcode, uint8_t lane,
Node* const* inputs) {
has_simd_ = true;
switch (opcode) {
case wasm::kExprF64x2ExtractLane:
return graph()->NewNode(mcgraph()->machine()->F64x2ExtractLane(lane),
inputs[0]);
case wasm::kExprF64x2ReplaceLane:
return graph()->NewNode(mcgraph()->machine()->F64x2ReplaceLane(lane),
inputs[0], inputs[1]);
case wasm::kExprF32x4ExtractLane:
return graph()->NewNode(mcgraph()->machine()->F32x4ExtractLane(lane),
inputs[0]);
......
......@@ -1050,6 +1050,8 @@ class WasmDecoder : public Decoder {
SimdLaneImmediate<validate>& imm) {
uint8_t num_lanes = 0;
switch (opcode) {
case kExprF64x2ExtractLane:
case kExprF64x2ReplaceLane:
case kExprI64x2ExtractLane:
case kExprI64x2ReplaceLane:
num_lanes = 2;
......@@ -2684,6 +2686,10 @@ class WasmFullDecoder : public WasmDecoder<validate> {
uint32_t DecodeSimdOpcode(WasmOpcode opcode) {
uint32_t len = 0;
switch (opcode) {
case kExprF64x2ExtractLane: {
len = SimdExtractLane(opcode, kWasmF64);
break;
}
case kExprF32x4ExtractLane: {
len = SimdExtractLane(opcode, kWasmF32);
break;
......@@ -2698,6 +2704,10 @@ class WasmFullDecoder : public WasmDecoder<validate> {
len = SimdExtractLane(opcode, kWasmI32);
break;
}
case kExprF64x2ReplaceLane: {
len = SimdReplaceLane(opcode, kWasmF64);
break;
}
case kExprF32x4ReplaceLane: {
len = SimdReplaceLane(opcode, kWasmF32);
break;
......
......@@ -2206,9 +2206,10 @@ class ThreadImpl {
Push(WasmValue(ss.val[LANE(imm.lane, ss)])); \
return true; \
}
EXTRACT_LANE_CASE(F64x2, f64x2)
EXTRACT_LANE_CASE(F32x4, f32x4)
EXTRACT_LANE_CASE(I64x2, i64x2)
EXTRACT_LANE_CASE(I32x4, i32x4)
EXTRACT_LANE_CASE(F32x4, f32x4)
EXTRACT_LANE_CASE(I16x8, i16x8)
EXTRACT_LANE_CASE(I8x16, i8x16)
#undef EXTRACT_LANE_CASE
......@@ -2386,6 +2387,7 @@ class ThreadImpl {
Push(WasmValue(Simd128(s))); \
return true; \
}
REPLACE_LANE_CASE(F64x2, f64x2, float2, double)
REPLACE_LANE_CASE(F32x4, f32x4, float4, float)
REPLACE_LANE_CASE(I64x2, i64x2, int2, int64_t)
REPLACE_LANE_CASE(I32x4, i32x4, int4, int32_t)
......
......@@ -248,6 +248,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_CONVERT_OP(Convert, I16x8, I8x16Low, "i32", "convert")
CASE_CONVERT_OP(Convert, I16x8, I8x16High, "i32", "convert")
CASE_CONVERT_OP(Convert, I8x16, I16x8, "i32", "convert")
CASE_F64x2_OP(ExtractLane, "extract_lane")
CASE_F64x2_OP(ReplaceLane, "replace_lane")
CASE_F32x4_OP(ExtractLane, "extract_lane")
CASE_F32x4_OP(ReplaceLane, "replace_lane")
CASE_I64x2_OP(ExtractLane, "extract_lane")
......
......@@ -402,6 +402,7 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I32x4ExtractLane, 0xfd0d, _) \
V(I64x2ExtractLane, 0xfd10, _) \
V(F32x4ExtractLane, 0xfd13, _) \
V(F64x2ExtractLane, 0xfd16, _) \
V(I8x16Shl, 0xfd54, _) \
V(I8x16ShrS, 0xfd55, _) \
V(I8x16ShrU, 0xfd56, _) \
......@@ -420,7 +421,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(I16x8ReplaceLane, 0xfd0b, _) \
V(I32x4ReplaceLane, 0xfd0e, _) \
V(I64x2ReplaceLane, 0xfd11, _) \
V(F32x4ReplaceLane, 0xfd14, _)
V(F32x4ReplaceLane, 0xfd14, _) \
V(F64x2ReplaceLane, 0xfd17, _)
#define FOREACH_SIMD_1_OPERAND_OPCODE(V) \
FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
......
......@@ -309,11 +309,13 @@ void PrintWasmText(const WasmModule* module, const ModuleWireBytes& wire_bytes,
case kExprI32x4ExtractLane:
case kExprI64x2ExtractLane:
case kExprF32x4ExtractLane:
case kExprF64x2ExtractLane:
case kExprI8x16ReplaceLane:
case kExprI16x8ReplaceLane:
case kExprI32x4ReplaceLane:
case kExprI64x2ReplaceLane:
case kExprF32x4ReplaceLane: {
case kExprF32x4ReplaceLane:
case kExprF64x2ReplaceLane: {
SimdLaneImmediate<Decoder::kNoValidate> imm(&i, i.pc());
os << WasmOpcodes::OpcodeName(opcode) << ' ' << imm.lane;
break;
......
......@@ -16,7 +16,7 @@ namespace internal {
namespace wasm {
#define FOREACH_SIMD_TYPE(V) \
V(double, float2, f64x4, 2) \
V(double, float2, f64x2, 2) \
V(float, float4, f32x4, 4) \
V(int64_t, int2, i64x2, 2) \
V(int32_t, int4, i32x4, 4) \
......
......@@ -286,7 +286,13 @@ T Sqrt(T a) {
#define WASM_SIMD_CONCAT_OP(op, bytes, x, y) \
x, y, WASM_SIMD_OP(op), TO_BYTE(bytes)
#define WASM_SIMD_SELECT(format, x, y, z) x, y, z, WASM_SIMD_OP(kExprS128Select)
#define WASM_SIMD_F64x2_SPLAT(x) WASM_SIMD_SPLAT(F64x2, x)
#define WASM_SIMD_F64x2_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprF64x2ExtractLane), TO_BYTE(lane)
#define WASM_SIMD_F64x2_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprF64x2ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_F32x4_SPLAT(x) WASM_SIMD_SPLAT(F32x4, x)
#define WASM_SIMD_F32x4_EXTRACT_LANE(lane, x) \
x, WASM_SIMD_OP(kExprF32x4ExtractLane), TO_BYTE(lane)
......@@ -379,30 +385,6 @@ bool IsExtreme(float x) {
(abs_x < kSmallFloatThreshold || abs_x > kLargeFloatThreshold);
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
double* g = r.builder().AddGlobal<double>(kWasmS128);
byte param1 = 0;
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(param1))),
WASM_ONE);
FOR_FLOAT64_INPUTS(x) {
r.Call(x);
double expected = x;
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
} else {
CHECK_EQ(actual, expected);
}
}
}
}
#endif // V8_TARGET_ARCH_X64
WASM_SIMD_TEST(F32x4Splat) {
WasmRunner<int32_t, float> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......@@ -715,6 +697,79 @@ WASM_SIMD_TEST(F32x4Le) {
}
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
double* g = r.builder().AddGlobal<double>(kWasmS128);
byte param1 = 0;
BUILD(r, WASM_SET_GLOBAL(0, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(param1))),
WASM_ONE);
FOR_FLOAT64_INPUTS(x) {
r.Call(x);
double expected = x;
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
} else {
CHECK_EQ(actual, expected);
}
}
}
}
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_F64_EQ(WASM_SIMD_F64x2_EXTRACT_LANE(
0, WASM_SIMD_I64x2_SPLAT(WASM_I64V(1e15))),
WASM_F64_REINTERPRET_I64(WASM_I64V(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
WasmRunner<double, double> r(execution_tier, lower_simd);
byte param1 = 0;
byte temp1 = r.AllocateLocal(kWasmF64);
byte temp2 = r.AllocateLocal(kWasmS128);
BUILD(r,
WASM_SET_LOCAL(temp1,
WASM_SIMD_F64x2_EXTRACT_LANE(
0, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(param1)))),
WASM_SET_LOCAL(temp2, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(temp1))),
WASM_SIMD_F64x2_EXTRACT_LANE(1, WASM_GET_LOCAL(temp2)));
FOR_FLOAT64_INPUTS(x) {
double actual = r.Call(x);
double expected = x;
if (std::isnan(expected)) {
CHECK(std::isnan(actual));
} else {
CHECK_EQ(actual, expected);
}
}
}
WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
// Set up a global to hold input/output vector.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Build function to replace each lane with its (FP) index.
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e100))),
WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_F64(0.0f))),
WASM_SET_GLOBAL(0, WASM_SIMD_F64x2_REPLACE_LANE(
1, WASM_GET_LOCAL(temp1), WASM_F64(1.0f))),
WASM_ONE);
r.Call();
for (int i = 0; i < 2; i++) {
CHECK_EQ(static_cast<double>(i), ReadLittleEndianValue<double>(&g[i]));
}
}
WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......@@ -2666,6 +2721,8 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8GtUMixed) {
#undef WASM_SIMD_CONCAT_OP
#undef WASM_SIMD_SELECT
#undef WASM_SIMD_F64x2_SPLAT
#undef WASM_SIMD_F64x2_EXTRACT_LANE
#undef WASM_SIMD_F64x2_REPLACE_LANE
#undef WASM_SIMD_F32x4_SPLAT
#undef WASM_SIMD_F32x4_EXTRACT_LANE
#undef WASM_SIMD_F32x4_REPLACE_LANE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment