Commit 428c2a38 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f32x4.sqrt for ia32

Bug: v8:8460
Change-Id: I8e72aa194cfc9797f0451d54638b6ba152d32971
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1797269Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63814}
parent 71ebd28d
...@@ -2234,6 +2234,13 @@ void Assembler::rcpps(XMMRegister dst, Operand src) { ...@@ -2234,6 +2234,13 @@ void Assembler::rcpps(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src); emit_sse_operand(dst, src);
} }
void Assembler::sqrtps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x51);
emit_sse_operand(dst, src);
}
void Assembler::rsqrtps(XMMRegister dst, Operand src) { void Assembler::rsqrtps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
EMIT(0x0F); EMIT(0x0F);
......
...@@ -875,6 +875,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -875,6 +875,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); } void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
void rcpps(XMMRegister dst, Operand src); void rcpps(XMMRegister dst, Operand src);
void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); } void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); }
void sqrtps(XMMRegister dst, Operand src);
void sqrtps(XMMRegister dst, XMMRegister src) { sqrtps(dst, Operand(src)); }
void rsqrtps(XMMRegister dst, Operand src); void rsqrtps(XMMRegister dst, Operand src);
void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); } void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
void haddps(XMMRegister dst, Operand src); void haddps(XMMRegister dst, Operand src);
...@@ -1299,6 +1301,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1299,6 +1301,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vrcpps(XMMRegister dst, Operand src) { void vrcpps(XMMRegister dst, Operand src) {
vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG); vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG);
} }
void vsqrtps(XMMRegister dst, XMMRegister src) { vsqrtps(dst, Operand(src)); }
void vsqrtps(XMMRegister dst, Operand src) {
vinstr(0x51, dst, xmm0, src, kNone, k0F, kWIG);
}
void vrsqrtps(XMMRegister dst, XMMRegister src) { void vrsqrtps(XMMRegister dst, XMMRegister src) {
vrsqrtps(dst, Operand(src)); vrsqrtps(dst, Operand(src));
} }
......
...@@ -1951,6 +1951,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1951,6 +1951,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(0)); i.InputOperand(0));
break; break;
} }
case kSSEF32x4Sqrt: {
__ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kAVXF32x4Sqrt: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32F32x4RecipApprox: { case kIA32F32x4RecipApprox: {
__ Rcpps(i.OutputSimd128Register(), i.InputOperand(0)); __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
break; break;
......
...@@ -129,6 +129,8 @@ namespace compiler { ...@@ -129,6 +129,8 @@ namespace compiler {
V(AVXF32x4Abs) \ V(AVXF32x4Abs) \
V(SSEF32x4Neg) \ V(SSEF32x4Neg) \
V(AVXF32x4Neg) \ V(AVXF32x4Neg) \
V(SSEF32x4Sqrt) \
V(AVXF32x4Sqrt) \
V(IA32F32x4RecipApprox) \ V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \ V(IA32F32x4RecipSqrtApprox) \
V(SSEF32x4Add) \ V(SSEF32x4Add) \
......
...@@ -110,6 +110,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -110,6 +110,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Abs: case kAVXF32x4Abs:
case kSSEF32x4Neg: case kSSEF32x4Neg:
case kAVXF32x4Neg: case kAVXF32x4Neg:
case kSSEF32x4Sqrt:
case kAVXF32x4Sqrt:
case kIA32F32x4RecipApprox: case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox: case kIA32F32x4RecipSqrtApprox:
case kSSEF32x4Add: case kSSEF32x4Add:
......
...@@ -1971,6 +1971,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -1971,6 +1971,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_UNOP_PREFIX_LIST(V) \ #define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \ V(F32x4Abs) \
V(F32x4Neg) \ V(F32x4Neg) \
V(F32x4Sqrt) \
V(S128Not) V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \ #define SIMD_ANYTRUE_LIST(V) \
......
...@@ -2645,7 +2645,9 @@ void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); } ...@@ -2645,7 +2645,9 @@ void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1057,6 +1057,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -1057,6 +1057,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop)); AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
break; break;
case 0x51:
AppendToBuffer("vsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x52: case 0x52:
AppendToBuffer("vrsqrtps %s,", NameOfXMMRegister(regop)); AppendToBuffer("vrsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
...@@ -1763,17 +1767,17 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1763,17 +1767,17 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop)); AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data); data += PrintRightXMMOperand(data);
} else if (f0byte >= 0x52 && f0byte <= 0x5F) { } else if (f0byte >= 0x51 && f0byte <= 0x5F) {
const char* const pseudo_op[] = { const char* const pseudo_op[] = {
"rsqrtps", "rcpps", "andps", "andnps", "orps", "sqrtps", "rsqrtps", "rcpps", "andps", "andnps",
"xorps", "addps", "mulps", "cvtps2pd", "cvtdq2ps", "orps", "xorps", "addps", "mulps", "cvtps2pd",
"subps", "minps", "divps", "maxps", "cvtdq2ps", "subps", "minps", "divps", "maxps",
}; };
data += 2; data += 2;
int mod, regop, rm; int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("%s %s,", pseudo_op[f0byte - 0x52], AppendToBuffer("%s %s,", pseudo_op[f0byte - 0x51],
NameOfXMMRegister(regop)); NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data); data += PrintRightXMMOperand(data);
} else if (f0byte == 0x50) { } else if (f0byte == 0x50) {
......
...@@ -435,6 +435,8 @@ TEST(DisasmIa320) { ...@@ -435,6 +435,8 @@ TEST(DisasmIa320) {
__ maxps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ maxps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rcpps(xmm1, xmm0); __ rcpps(xmm1, xmm0);
__ rcpps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ rcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ sqrtps(xmm1, xmm0);
__ sqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rsqrtps(xmm1, xmm0); __ rsqrtps(xmm1, xmm0);
__ rsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ rsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
...@@ -639,6 +641,8 @@ TEST(DisasmIa320) { ...@@ -639,6 +641,8 @@ TEST(DisasmIa320) {
__ vmaxps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000)); __ vmaxps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vrcpps(xmm1, xmm0); __ vrcpps(xmm1, xmm0);
__ vrcpps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ vrcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsqrtps(xmm1, xmm0);
__ vsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vrsqrtps(xmm1, xmm0); __ vrsqrtps(xmm1, xmm0);
__ vrsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000)); __ vrsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovaps(xmm0, xmm1); __ vmovaps(xmm0, xmm1);
......
...@@ -600,11 +600,11 @@ WASM_SIMD_TEST(F32x4Neg) { ...@@ -600,11 +600,11 @@ WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Neg, Negate); RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Neg, Negate);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Sqrt) { WASM_SIMD_TEST(F32x4Sqrt) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Sqrt, Sqrt); RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Sqrt, Sqrt);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4RecipApprox) { WASM_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox, RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment