Commit 472aff97 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f64x2 sqrt for ia32

Bug: v8:9728
Change-Id: Ic15d793e6408af1ea2e1f7f71b9130300d359a95
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1808417Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64073}
parent f61780c4
...@@ -882,6 +882,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -882,6 +882,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); } void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
void haddps(XMMRegister dst, Operand src); void haddps(XMMRegister dst, Operand src);
void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); } void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
void sqrtpd(XMMRegister dst, Operand src) {
sse2_instr(dst, src, 0x66, 0x0F, 0x51);
}
void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); }
void minps(XMMRegister dst, Operand src); void minps(XMMRegister dst, Operand src);
void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); } void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); }
...@@ -1318,6 +1322,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1318,6 +1322,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) { void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG); vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
} }
void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); }
void vsqrtpd(XMMRegister dst, Operand src) {
vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); } void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); } void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); } void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
......
...@@ -258,6 +258,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -258,6 +258,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Register, XMMRegister)
AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister) AVX_OP2_WITH_TYPE(Movd, movd, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand) AVX_OP2_WITH_TYPE(Cvtdq2ps, cvtdq2ps, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Sqrtpd, sqrtpd, XMMRegister, const Operand&)
#undef AVX_OP2_WITH_TYPE #undef AVX_OP2_WITH_TYPE
......
...@@ -1900,6 +1900,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1900,6 +1900,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
} }
case kIA32F64x2Sqrt: {
__ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
break;
}
case kSSEF32x4Splat: { case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
......
...@@ -122,6 +122,7 @@ namespace compiler { ...@@ -122,6 +122,7 @@ namespace compiler {
V(AVXF64x2ExtractLane) \ V(AVXF64x2ExtractLane) \
V(SSEF64x2ReplaceLane) \ V(SSEF64x2ReplaceLane) \
V(AVXF64x2ReplaceLane) \ V(AVXF64x2ReplaceLane) \
V(IA32F64x2Sqrt) \
V(SSEF32x4Splat) \ V(SSEF32x4Splat) \
V(AVXF32x4Splat) \ V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \ V(SSEF32x4ExtractLane) \
......
...@@ -103,6 +103,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -103,6 +103,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF64x2ExtractLane: case kAVXF64x2ExtractLane:
case kSSEF64x2ReplaceLane: case kSSEF64x2ReplaceLane:
case kAVXF64x2ReplaceLane: case kAVXF64x2ReplaceLane:
case kIA32F64x2Sqrt:
case kSSEF32x4Splat: case kSSEF32x4Splat:
case kAVXF32x4Splat: case kAVXF32x4Splat:
case kSSEF32x4ExtractLane: case kSSEF32x4ExtractLane:
......
...@@ -830,7 +830,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) { ...@@ -830,7 +830,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32) \ V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32) \
V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \ V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \
V(SignExtendWord8ToInt32, kIA32Movsxbl) \ V(SignExtendWord8ToInt32, kIA32Movsxbl) \
V(SignExtendWord16ToInt32, kIA32Movsxwl) V(SignExtendWord16ToInt32, kIA32Movsxwl) \
V(F64x2Sqrt, kIA32F64x2Sqrt)
#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64) #define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64)
......
...@@ -2627,8 +2627,8 @@ void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); } ...@@ -2627,8 +2627,8 @@ void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1285,11 +1285,11 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) { ...@@ -1285,11 +1285,11 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Neg) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Neg, Negate);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) { WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) { WasmOpcode opcode, DoubleBinOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment