Commit 8214bea6 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f64x2 add sub mul div for ia32

Bug: v8:9728
Change-Id: Ie769ae0431b7924a4b8f8858681d57e92c00f4b3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1808400Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64118}
parent d05b2d3e
...@@ -299,6 +299,28 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -299,6 +299,28 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
#undef AVX_OP3_XO #undef AVX_OP3_XO
#undef AVX_OP3_WITH_TYPE #undef AVX_OP3_WITH_TYPE
// Only use this macro when dst and src1 is the same in SSE case.
#define AVX_PACKED_OP3_WITH_TYPE(macro_name, name, dst_type, src_type) \
void macro_name(dst_type dst, dst_type src1, src_type src2) { \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope scope(this, AVX); \
v##name(dst, src1, src2); \
} else { \
DCHECK_EQ(dst, src1); \
name(dst, src2); \
} \
}
#define AVX_PACKED_OP3(macro_name, name) \
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, XMMRegister) \
AVX_PACKED_OP3_WITH_TYPE(macro_name, name, XMMRegister, Operand)
AVX_PACKED_OP3(Addpd, addpd)
AVX_PACKED_OP3(Subpd, subpd)
AVX_PACKED_OP3(Mulpd, mulpd)
AVX_PACKED_OP3(Divpd, divpd)
#undef AVX_PACKED_OP3
#undef AVX_PACKED_OP3_WITH_TYPE
// Non-SSE2 instructions. // Non-SSE2 instructions.
#define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \ #define AVX_OP2_WITH_TYPE_SCOPE(macro_name, name, dst_type, src_type, \
sse_scope) \ sse_scope) \
......
...@@ -1904,6 +1904,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1904,6 +1904,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0)); __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
break; break;
} }
case kIA32F64x2Add: {
__ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Sub: {
__ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Mul: {
__ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
break;
}
case kIA32F64x2Div: {
__ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputOperand(1));
break;
}
case kSSEF32x4Splat: { case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
......
...@@ -123,6 +123,10 @@ namespace compiler { ...@@ -123,6 +123,10 @@ namespace compiler {
V(SSEF64x2ReplaceLane) \ V(SSEF64x2ReplaceLane) \
V(AVXF64x2ReplaceLane) \ V(AVXF64x2ReplaceLane) \
V(IA32F64x2Sqrt) \ V(IA32F64x2Sqrt) \
V(IA32F64x2Add) \
V(IA32F64x2Sub) \
V(IA32F64x2Mul) \
V(IA32F64x2Div) \
V(SSEF32x4Splat) \ V(SSEF32x4Splat) \
V(AVXF32x4Splat) \ V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \ V(SSEF32x4ExtractLane) \
......
...@@ -104,6 +104,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -104,6 +104,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEF64x2ReplaceLane: case kSSEF64x2ReplaceLane:
case kAVXF64x2ReplaceLane: case kAVXF64x2ReplaceLane:
case kIA32F64x2Sqrt: case kIA32F64x2Sqrt:
case kIA32F64x2Add:
case kIA32F64x2Sub:
case kIA32F64x2Mul:
case kIA32F64x2Div:
case kSSEF32x4Splat: case kSSEF32x4Splat:
case kAVXF32x4Splat: case kAVXF32x4Splat:
case kSSEF32x4ExtractLane: case kSSEF32x4ExtractLane:
......
...@@ -860,7 +860,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) { ...@@ -860,7 +860,11 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \ V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \ V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \ V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add) \
V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub) \
V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul) \
V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div)
#define FLOAT_UNOP_LIST(V) \ #define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \ V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
......
...@@ -2628,11 +2628,11 @@ void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } ...@@ -2628,11 +2628,11 @@ void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1289,7 +1289,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) { ...@@ -1289,7 +1289,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) { WasmOpcode opcode, DoubleBinOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
...@@ -1353,6 +1352,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) { ...@@ -1353,6 +1352,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div); RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) { WasmOpcode opcode, DoubleCompareOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment