Commit 428c2a38 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f32x4.sqrt for ia32

Bug: v8:8460
Change-Id: I8e72aa194cfc9797f0451d54638b6ba152d32971
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1797269Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63814}
parent 71ebd28d
......@@ -2234,6 +2234,13 @@ void Assembler::rcpps(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::sqrtps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x51);
emit_sse_operand(dst, src);
}
void Assembler::rsqrtps(XMMRegister dst, Operand src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
......
......@@ -875,6 +875,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
void rcpps(XMMRegister dst, Operand src);
void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); }
void sqrtps(XMMRegister dst, Operand src);
void sqrtps(XMMRegister dst, XMMRegister src) { sqrtps(dst, Operand(src)); }
void rsqrtps(XMMRegister dst, Operand src);
void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
void haddps(XMMRegister dst, Operand src);
......@@ -1299,6 +1301,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vrcpps(XMMRegister dst, Operand src) {
vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG);
}
void vsqrtps(XMMRegister dst, XMMRegister src) { vsqrtps(dst, Operand(src)); }
void vsqrtps(XMMRegister dst, Operand src) {
vinstr(0x51, dst, xmm0, src, kNone, k0F, kWIG);
}
void vrsqrtps(XMMRegister dst, XMMRegister src) {
vrsqrtps(dst, Operand(src));
}
......
......@@ -1951,6 +1951,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(0));
break;
}
case kSSEF32x4Sqrt: {
__ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kAVXF32x4Sqrt: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32F32x4RecipApprox: {
__ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
break;
......
......@@ -129,6 +129,8 @@ namespace compiler {
V(AVXF32x4Abs) \
V(SSEF32x4Neg) \
V(AVXF32x4Neg) \
V(SSEF32x4Sqrt) \
V(AVXF32x4Sqrt) \
V(IA32F32x4RecipApprox) \
V(IA32F32x4RecipSqrtApprox) \
V(SSEF32x4Add) \
......
......@@ -110,6 +110,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Abs:
case kSSEF32x4Neg:
case kAVXF32x4Neg:
case kSSEF32x4Sqrt:
case kAVXF32x4Sqrt:
case kIA32F32x4RecipApprox:
case kIA32F32x4RecipSqrtApprox:
case kSSEF32x4Add:
......
......@@ -1971,6 +1971,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_UNOP_PREFIX_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4Sqrt) \
V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \
......
......@@ -2645,7 +2645,9 @@ void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_IA32
#endif // !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI64x2MinS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxS(Node* node) { UNIMPLEMENTED(); }
......
......@@ -1057,6 +1057,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x51:
AppendToBuffer("vsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x52:
AppendToBuffer("vrsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......@@ -1763,17 +1767,17 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte >= 0x52 && f0byte <= 0x5F) {
} else if (f0byte >= 0x51 && f0byte <= 0x5F) {
const char* const pseudo_op[] = {
"rsqrtps", "rcpps", "andps", "andnps", "orps",
"xorps", "addps", "mulps", "cvtps2pd", "cvtdq2ps",
"subps", "minps", "divps", "maxps",
"sqrtps", "rsqrtps", "rcpps", "andps", "andnps",
"orps", "xorps", "addps", "mulps", "cvtps2pd",
"cvtdq2ps", "subps", "minps", "divps", "maxps",
};
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("%s %s,", pseudo_op[f0byte - 0x52],
AppendToBuffer("%s %s,", pseudo_op[f0byte - 0x51],
NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte == 0x50) {
......
......@@ -435,6 +435,8 @@ TEST(DisasmIa320) {
__ maxps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rcpps(xmm1, xmm0);
__ rcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ sqrtps(xmm1, xmm0);
__ sqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rsqrtps(xmm1, xmm0);
__ rsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
......@@ -639,6 +641,8 @@ TEST(DisasmIa320) {
__ vmaxps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vrcpps(xmm1, xmm0);
__ vrcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsqrtps(xmm1, xmm0);
__ vsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vrsqrtps(xmm1, xmm0);
__ vrsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovaps(xmm0, xmm1);
......
......@@ -600,11 +600,11 @@ WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Neg, Negate);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Sqrt) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Sqrt, Sqrt);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4RecipApprox,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment