Commit f9b2f665 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm simd] Implement F64x2 Add Sub Mul on x64

Bug: v8:8460
Change-Id: Ia9b2360c414abedfd9690e97b555c4e9b19fa1b4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1708451Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62922}
parent 1d4079b3
...@@ -1329,9 +1329,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1329,9 +1329,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} }
AVX_SP_3(vsqrt, 0x51) AVX_SP_3(vsqrt, 0x51)
AVX_SP_3(vadd, 0x58) AVX_S_3(vadd, 0x58)
AVX_SP_3(vsub, 0x5c) AVX_S_3(vsub, 0x5c)
AVX_SP_3(vmul, 0x59) AVX_S_3(vmul, 0x59)
AVX_SP_3(vdiv, 0x5e) AVX_SP_3(vdiv, 0x5e)
AVX_SP_3(vmin, 0x5d) AVX_SP_3(vmin, 0x5d)
AVX_SP_3(vmax, 0x5f) AVX_SP_3(vmax, 0x5f)
......
...@@ -6,7 +6,10 @@ ...@@ -6,7 +6,10 @@
#define V8_CODEGEN_X64_SSE_INSTR_H_ #define V8_CODEGEN_X64_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \ #define SSE2_INSTRUCTION_LIST(V) \
V(addpd, 66, 0F, 58) \
V(mulpd, 66, 0F, 59) \
V(cvtps2dq, 66, 0F, 5B) \ V(cvtps2dq, 66, 0F, 5B) \
V(subpd, 66, 0F, 5C) \
V(punpcklbw, 66, 0F, 60) \ V(punpcklbw, 66, 0F, 60) \
V(punpcklwd, 66, 0F, 61) \ V(punpcklwd, 66, 0F, 61) \
V(punpckldq, 66, 0F, 62) \ V(punpckldq, 66, 0F, 62) \
......
...@@ -1827,6 +1827,12 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -1827,6 +1827,12 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Abs(node); return MarkAsSimd128(node), VisitF64x2Abs(node);
case IrOpcode::kF64x2Neg: case IrOpcode::kF64x2Neg:
return MarkAsSimd128(node), VisitF64x2Neg(node); return MarkAsSimd128(node), VisitF64x2Neg(node);
case IrOpcode::kF64x2Add:
return MarkAsSimd128(node), VisitF64x2Add(node);
case IrOpcode::kF64x2Sub:
return MarkAsSimd128(node), VisitF64x2Sub(node);
case IrOpcode::kF64x2Mul:
return MarkAsSimd128(node), VisitF64x2Mul(node);
case IrOpcode::kF64x2Eq: case IrOpcode::kF64x2Eq:
return MarkAsSimd128(node), VisitF64x2Eq(node); return MarkAsSimd128(node), VisitF64x2Eq(node);
case IrOpcode::kF64x2Ne: case IrOpcode::kF64x2Ne:
...@@ -2558,6 +2564,9 @@ void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } ...@@ -2558,6 +2564,9 @@ void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
......
...@@ -2284,6 +2284,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2284,6 +2284,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ movq(i.OutputDoubleRegister(), kScratchRegister); __ movq(i.OutputDoubleRegister(), kScratchRegister);
break; break;
} }
case kX64F64x2Add: {
ASSEMBLE_SSE_BINOP(addpd);
break;
}
case kX64F64x2Sub: {
ASSEMBLE_SSE_BINOP(subpd);
break;
}
case kX64F64x2Mul: {
ASSEMBLE_SSE_BINOP(mulpd);
break;
}
case kX64F64x2Eq: { case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
......
...@@ -164,6 +164,9 @@ namespace compiler { ...@@ -164,6 +164,9 @@ namespace compiler {
V(X64F64x2ReplaceLane) \ V(X64F64x2ReplaceLane) \
V(X64F64x2Abs) \ V(X64F64x2Abs) \
V(X64F64x2Neg) \ V(X64F64x2Neg) \
V(X64F64x2Add) \
V(X64F64x2Sub) \
V(X64F64x2Mul) \
V(X64F64x2Eq) \ V(X64F64x2Eq) \
V(X64F64x2Ne) \ V(X64F64x2Ne) \
V(X64F64x2Lt) \ V(X64F64x2Lt) \
......
...@@ -129,6 +129,9 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -129,6 +129,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2ReplaceLane: case kX64F64x2ReplaceLane:
case kX64F64x2Abs: case kX64F64x2Abs:
case kX64F64x2Neg: case kX64F64x2Neg:
case kX64F64x2Add:
case kX64F64x2Sub:
case kX64F64x2Mul:
case kX64F64x2Eq: case kX64F64x2Eq:
case kX64F64x2Ne: case kX64F64x2Ne:
case kX64F64x2Lt: case kX64F64x2Lt:
......
...@@ -2586,6 +2586,9 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2586,6 +2586,9 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16) V(I8x16)
#define SIMD_BINOP_LIST(V) \ #define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Eq) \ V(F64x2Eq) \
V(F64x2Ne) \ V(F64x2Ne) \
V(F64x2Lt) \ V(F64x2Lt) \
......
...@@ -248,6 +248,9 @@ MachineType AtomicOpType(Operator const* op) { ...@@ -248,6 +248,9 @@ MachineType AtomicOpType(Operator const* op) {
V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \ V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \ V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Neg, Operator::kNoProperties, 1, 0, 1) \ V(F64x2Neg, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Add, Operator::kCommutative, 2, 0, 1) \
V(F64x2Sub, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Mul, Operator::kCommutative, 2, 0, 1) \
V(F64x2Eq, Operator::kCommutative, 2, 0, 1) \ V(F64x2Eq, Operator::kCommutative, 2, 0, 1) \
V(F64x2Ne, Operator::kCommutative, 2, 0, 1) \ V(F64x2Ne, Operator::kCommutative, 2, 0, 1) \
V(F64x2Lt, Operator::kNoProperties, 2, 0, 1) \ V(F64x2Lt, Operator::kNoProperties, 2, 0, 1) \
......
...@@ -471,6 +471,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -471,6 +471,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Splat(); const Operator* F64x2Splat();
const Operator* F64x2Abs(); const Operator* F64x2Abs();
const Operator* F64x2Neg(); const Operator* F64x2Neg();
const Operator* F64x2Add();
const Operator* F64x2Sub();
const Operator* F64x2Mul();
const Operator* F64x2ExtractLane(int32_t); const Operator* F64x2ExtractLane(int32_t);
const Operator* F64x2ReplaceLane(int32_t); const Operator* F64x2ReplaceLane(int32_t);
const Operator* F64x2Eq(); const Operator* F64x2Eq();
......
...@@ -747,6 +747,9 @@ ...@@ -747,6 +747,9 @@
V(F64x2ReplaceLane) \ V(F64x2ReplaceLane) \
V(F64x2Abs) \ V(F64x2Abs) \
V(F64x2Neg) \ V(F64x2Neg) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Eq) \ V(F64x2Eq) \
V(F64x2Ne) \ V(F64x2Ne) \
V(F64x2Lt) \ V(F64x2Lt) \
......
...@@ -3995,6 +3995,15 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -3995,6 +3995,15 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]);
case wasm::kExprF64x2Neg: case wasm::kExprF64x2Neg:
return graph()->NewNode(mcgraph()->machine()->F64x2Neg(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->F64x2Neg(), inputs[0]);
case wasm::kExprF64x2Add:
return graph()->NewNode(mcgraph()->machine()->F64x2Add(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Sub:
return graph()->NewNode(mcgraph()->machine()->F64x2Sub(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Mul:
return graph()->NewNode(mcgraph()->machine()->F64x2Mul(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Eq: case wasm::kExprF64x2Eq:
return graph()->NewNode(mcgraph()->machine()->F64x2Eq(), inputs[0], return graph()->NewNode(mcgraph()->machine()->F64x2Eq(), inputs[0],
inputs[1]); inputs[1]);
......
...@@ -1851,8 +1851,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) { ...@@ -1851,8 +1851,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
mnemonic = "orpd"; mnemonic = "orpd";
} else if (opcode == 0x57) { } else if (opcode == 0x57) {
mnemonic = "xorpd"; mnemonic = "xorpd";
} else if (opcode == 0x58) {
mnemonic = "addpd";
} else if (opcode == 0x59) {
mnemonic = "mulpd";
} else if (opcode == 0x5B) { } else if (opcode == 0x5B) {
mnemonic = "cvtps2dq"; mnemonic = "cvtps2dq";
} else if (opcode == 0x5C) {
mnemonic = "subpd";
} else if (opcode == 0x60) { } else if (opcode == 0x60) {
mnemonic = "punpcklbw"; mnemonic = "punpcklbw";
} else if (opcode == 0x61) { } else if (opcode == 0x61) {
......
...@@ -2241,6 +2241,9 @@ class ThreadImpl { ...@@ -2241,6 +2241,9 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \ Push(WasmValue(Simd128(res))); \
return true; \ return true; \
} }
BINOP_CASE(F64x2Add, f64x2, float2, 2, a + b)
BINOP_CASE(F64x2Sub, f64x2, float2, 2, a - b)
BINOP_CASE(F64x2Mul, f64x2, float2, 2, a * b)
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b) BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b) BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b) BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
......
...@@ -229,10 +229,13 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -229,10 +229,13 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F64x2_OP(Ne, "ne") CASE_F64x2_OP(Ne, "ne")
CASE_I64x2_OP(Ne, "ne") CASE_I64x2_OP(Ne, "ne")
CASE_SIMD_OP(Add, "add") CASE_SIMD_OP(Add, "add")
CASE_F64x2_OP(Add, "add")
CASE_I64x2_OP(Add, "add") CASE_I64x2_OP(Add, "add")
CASE_SIMD_OP(Sub, "sub") CASE_SIMD_OP(Sub, "sub")
CASE_F64x2_OP(Sub, "sub")
CASE_I64x2_OP(Sub, "sub") CASE_I64x2_OP(Sub, "sub")
CASE_SIMD_OP(Mul, "mul") CASE_SIMD_OP(Mul, "mul")
CASE_F64x2_OP(Mul, "mul")
CASE_I64x2_OP(Mul, "mul") CASE_I64x2_OP(Mul, "mul")
CASE_F64x2_OP(Splat, "splat") CASE_F64x2_OP(Splat, "splat")
CASE_F64x2_OP(Lt, "lt") CASE_F64x2_OP(Lt, "lt")
......
...@@ -387,6 +387,9 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature); ...@@ -387,6 +387,9 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, bool hasBigIntFeature);
V(F32x4Max, 0xfd9f, s_ss) \ V(F32x4Max, 0xfd9f, s_ss) \
V(F64x2Abs, 0xfda0, s_s) \ V(F64x2Abs, 0xfda0, s_s) \
V(F64x2Neg, 0xfda1, s_s) \ V(F64x2Neg, 0xfda1, s_s) \
V(F64x2Add, 0xfda5, s_ss) \
V(F64x2Sub, 0xfda6, s_ss) \
V(F64x2Mul, 0xfda7, s_ss) \
V(I32x4SConvertF32x4, 0xfdab, s_s) \ V(I32x4SConvertF32x4, 0xfdab, s_s) \
V(I32x4UConvertF32x4, 0xfdac, s_s) \ V(I32x4UConvertF32x4, 0xfdac, s_s) \
V(F32x4SConvertI32x4, 0xfdaf, s_s) \ V(F32x4SConvertI32x4, 0xfdaf, s_s) \
......
...@@ -20,6 +20,7 @@ namespace test_run_wasm_simd { ...@@ -20,6 +20,7 @@ namespace test_run_wasm_simd {
namespace { namespace {
using DoubleUnOp = double (*)(double); using DoubleUnOp = double (*)(double);
using DoubleBinOp = double (*)(double, double);
using DoubleCompareOp = int64_t (*)(double, double); using DoubleCompareOp = int64_t (*)(double, double);
using FloatUnOp = float (*)(float); using FloatUnOp = float (*)(float);
using FloatBinOp = float (*)(float, float); using FloatBinOp = float (*)(float, float);
...@@ -948,7 +949,6 @@ void RunF64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, ...@@ -948,7 +949,6 @@ void RunF64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
} }
} }
} }
#undef FOR_FLOAT64_NAN_INPUTS
WASM_SIMD_TEST_NO_LOWERING(F64x2Abs) { WASM_SIMD_TEST_NO_LOWERING(F64x2Abs) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Abs, std::abs); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Abs, std::abs);
...@@ -958,6 +958,65 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) { ...@@ -958,6 +958,65 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate);
} }
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
// Global to hold output.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test value, perform binop, and write the result.
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(value1))),
WASM_SET_LOCAL(temp2, WASM_SIMD_F64x2_SPLAT(WASM_GET_LOCAL(value2))),
WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(opcode, WASM_GET_LOCAL(temp1),
WASM_GET_LOCAL(temp2))),
WASM_ONE);
FOR_FLOAT64_INPUTS(x) {
if (!PlatformCanRepresent(x)) continue;
FOR_FLOAT64_INPUTS(y) {
if (!PlatformCanRepresent(x)) continue;
double expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckDoubleResult(x, y, expected, actual, true /* exact */);
}
}
}
FOR_FLOAT64_NAN_INPUTS(i) {
double x = bit_cast<double>(double_nan_test_array[i]);
if (!PlatformCanRepresent(x)) continue;
FOR_FLOAT64_NAN_INPUTS(j) {
double y = bit_cast<double>(double_nan_test_array[j]);
double expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
for (int i = 0; i < 2; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckDoubleResult(x, y, expected, actual, true /* exact */);
}
}
}
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Add) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Add, Add);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Sub) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Sub, Sub);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Mul) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Mul, Mul);
}
#undef FOR_FLOAT64_NAN_INPUTS
WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) { WASM_SIMD_TEST_NO_LOWERING(I64x2Splat) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd); WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector. // Set up a global to hold output vector.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment