Commit 2e3df6d9 authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

[Liftoff] Implement f32.sqrt and f64.sqrt

On ia32, support for vsqrtss and vsqrtsd was missing, so I add the
implementation of these instructions and disassembly support.
On x64, disassembly support for vsqrtss was missing, while vsqrtsd was
implemented. Now both are implemented.
The implementation of f32.sqrt and f64.sqrt is very straight-forward on
ia32 and x64, we can immediately emit the {v}sqrtss or {v}sqrtsd
instruction.

R=ahaas@chromium.org

Bug: v8:6600
Change-Id: Icf3ec05a97a23e94cdf70f4a72f30dd02fbddd13
Reviewed-on: https://chromium-review.googlesource.com/944221Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51724}
parent 73d6037c
......@@ -1345,6 +1345,12 @@ class Assembler : public AssemblerBase {
void vminsd(XMMRegister dst, XMMRegister src1, Operand src2) {
vsd(0x5d, dst, src1, src2);
}
void vsqrtsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vsqrtsd(dst, src1, Operand(src2));
}
void vsqrtsd(XMMRegister dst, XMMRegister src1, Operand src2) {
vsd(0x51, dst, src1, src2);
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
......@@ -1383,6 +1389,12 @@ class Assembler : public AssemblerBase {
void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
vss(0x5d, dst, src1, src2);
}
void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vsqrtss(dst, src1, Operand(src2));
}
void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
vss(0x51, dst, src1, src2);
}
void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vrcpps(XMMRegister dst, XMMRegister src) { vrcpps(dst, Operand(src)); }
......
......@@ -868,6 +868,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x51:
AppendToBuffer("vsqrtsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -916,6 +921,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x51:
AppendToBuffer("vsqrtss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......
......@@ -253,6 +253,8 @@ class TurboAssembler : public Assembler {
AVX_OP3_XO(Pxor, pxor)
AVX_OP3_XO(Xorps, xorps)
AVX_OP3_XO(Xorpd, xorpd)
AVX_OP3_XO(Sqrtss, sqrtss)
AVX_OP3_XO(Sqrtsd, sqrtsd)
#undef AVX_OP3_XO
#undef AVX_OP3_WITH_TYPE
......
......@@ -140,10 +140,12 @@ UNIMPLEMENTED_FP_BINOP(f32_add)
UNIMPLEMENTED_FP_BINOP(f32_sub)
UNIMPLEMENTED_FP_BINOP(f32_mul)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
UNIMPLEMENTED_FP_BINOP(f64_add)
UNIMPLEMENTED_FP_BINOP(f64_sub)
UNIMPLEMENTED_FP_BINOP(f64_mul)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef UNIMPLEMENTED_GP_BINOP
#undef UNIMPLEMENTED_GP_UNOP
......
......@@ -140,10 +140,12 @@ UNIMPLEMENTED_FP_BINOP(f32_add)
UNIMPLEMENTED_FP_BINOP(f32_sub)
UNIMPLEMENTED_FP_BINOP(f32_mul)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
UNIMPLEMENTED_FP_BINOP(f64_add)
UNIMPLEMENTED_FP_BINOP(f64_sub)
UNIMPLEMENTED_FP_BINOP(f64_mul)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef UNIMPLEMENTED_GP_BINOP
#undef UNIMPLEMENTED_GP_UNOP
......
......@@ -576,6 +576,10 @@ void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
}
}
void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
Sqrtss(dst, src);
}
void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -628,6 +632,10 @@ void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
}
}
void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
Sqrtsd(dst, src);
}
void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
......
......@@ -386,6 +386,7 @@ class LiftoffAssembler : public TurboAssembler {
DoubleRegister rhs);
// f32 unops.
inline void emit_f32_neg(DoubleRegister dst, DoubleRegister src);
inline void emit_f32_sqrt(DoubleRegister dst, DoubleRegister src);
// f64 binops.
inline void emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
......@@ -397,6 +398,7 @@ class LiftoffAssembler : public TurboAssembler {
// f64 unops.
inline void emit_f64_neg(DoubleRegister dst, DoubleRegister src);
inline void emit_f64_sqrt(DoubleRegister dst, DoubleRegister src);
inline void emit_jump(Label*);
inline void emit_cond_jump(Condition, Label*, ValueType value, Register lhs,
......
......@@ -561,7 +561,9 @@ class LiftoffCompiler {
});
break;
CASE_FLOAT_UNOP(F32Neg, F32, f32_neg)
CASE_FLOAT_UNOP(F32Sqrt, F32, f32_sqrt)
CASE_FLOAT_UNOP(F64Neg, F64, f64_neg)
CASE_FLOAT_UNOP(F64Sqrt, F64, f64_sqrt)
default:
return unsupported(decoder, WasmOpcodes::OpcodeName(opcode));
}
......
......@@ -402,10 +402,12 @@ FP_BINOP(f32_add, add_s)
FP_BINOP(f32_sub, sub_s)
FP_BINOP(f32_mul, mul_s)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
FP_BINOP(f64_add, add_d)
FP_BINOP(f64_sub, sub_d)
FP_BINOP(f64_mul, mul_d)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef FP_BINOP
#undef UNIMPLEMENTED_FP_BINOP
......
......@@ -347,10 +347,12 @@ FP_BINOP(f32_add, add_s)
FP_BINOP(f32_sub, sub_s)
FP_BINOP(f32_mul, mul_s)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
FP_BINOP(f64_add, add_d)
FP_BINOP(f64_sub, sub_d)
FP_BINOP(f64_mul, mul_d)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef FP_BINOP
#undef UNIMPLEMENTED_FP_BINOP
......
......@@ -140,10 +140,12 @@ UNIMPLEMENTED_FP_BINOP(f32_add)
UNIMPLEMENTED_FP_BINOP(f32_sub)
UNIMPLEMENTED_FP_BINOP(f32_mul)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
UNIMPLEMENTED_FP_BINOP(f64_add)
UNIMPLEMENTED_FP_BINOP(f64_sub)
UNIMPLEMENTED_FP_BINOP(f64_mul)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef UNIMPLEMENTED_GP_BINOP
#undef UNIMPLEMENTED_GP_UNOP
......
......@@ -140,10 +140,12 @@ UNIMPLEMENTED_FP_BINOP(f32_add)
UNIMPLEMENTED_FP_BINOP(f32_sub)
UNIMPLEMENTED_FP_BINOP(f32_mul)
UNIMPLEMENTED_FP_UNOP(f32_neg)
UNIMPLEMENTED_FP_UNOP(f32_sqrt)
UNIMPLEMENTED_FP_BINOP(f64_add)
UNIMPLEMENTED_FP_BINOP(f64_sub)
UNIMPLEMENTED_FP_BINOP(f64_mul)
UNIMPLEMENTED_FP_UNOP(f64_neg)
UNIMPLEMENTED_FP_UNOP(f64_sqrt)
#undef UNIMPLEMENTED_GP_BINOP
#undef UNIMPLEMENTED_GP_UNOP
......
......@@ -526,6 +526,10 @@ void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
}
}
void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
Sqrtss(dst, src);
}
void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -578,6 +582,10 @@ void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
}
}
void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
Sqrtsd(dst, src);
}
void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label,
......
......@@ -1040,6 +1040,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x51:
AppendToBuffer("vsqrtss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......
......@@ -208,6 +208,7 @@ class TurboAssembler : public Assembler {
AVX_OP(Cmpnlepd, cmpnlepd)
AVX_OP(Roundss, roundss)
AVX_OP(Roundsd, roundsd)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd)
......
......@@ -498,6 +498,8 @@ TEST(DisasmIa320) {
__ minsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ maxsd(xmm1, xmm0);
__ maxsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ sqrtsd(xmm1, xmm0);
__ sqrtsd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ ucomisd(xmm0, xmm1);
__ cmpltsd(xmm0, xmm1);
......@@ -606,6 +608,8 @@ TEST(DisasmIa320) {
__ vminsd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmaxsd(xmm0, xmm1, xmm2);
__ vmaxsd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsqrtsd(xmm0, xmm1, xmm2);
__ vsqrtsd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vaddss(xmm0, xmm1, xmm2);
__ vaddss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
......@@ -619,6 +623,8 @@ TEST(DisasmIa320) {
__ vminss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmaxss(xmm0, xmm1, xmm2);
__ vmaxss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsqrtss(xmm0, xmm1, xmm2);
__ vsqrtss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vandps(xmm0, xmm1, xmm2);
__ vandps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
......
......@@ -433,6 +433,8 @@ TEST(DisasmX64) {
__ maxss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ minss(xmm1, xmm0);
__ minss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ sqrtss(xmm1, xmm0);
__ sqrtss(xmm1, Operand(rbx, rcx, times_4, 10000));
__ addps(xmm1, xmm0);
__ addps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ subps(xmm1, xmm0);
......@@ -474,6 +476,8 @@ TEST(DisasmX64) {
__ minsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ maxsd(xmm1, xmm0);
__ maxsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ sqrtsd(xmm1, xmm0);
__ sqrtsd(xmm1, Operand(rbx, rcx, times_4, 10000));
__ ucomisd(xmm0, xmm1);
__ andpd(xmm0, xmm1);
......@@ -635,6 +639,8 @@ TEST(DisasmX64) {
__ vminss(xmm9, xmm1, Operand(rbx, rcx, times_8, 10000));
__ vmaxss(xmm8, xmm1, xmm2);
__ vmaxss(xmm9, xmm1, Operand(rbx, rcx, times_1, 10000));
__ vsqrtss(xmm8, xmm1, xmm2);
__ vsqrtss(xmm9, xmm1, Operand(rbx, rcx, times_1, 10000));
__ vmovss(xmm9, Operand(r11, rcx, times_8, -10000));
__ vmovss(Operand(rbx, r9, times_4, 10000), xmm1);
__ vucomiss(xmm9, xmm1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment