Commit 08e514a8 authored by Clemens Backes's avatar Clemens Backes Committed by V8 LUCI CQ

[codegen][x64] Improve code for float to int64

This improves the code generated for float to int64 conversions on x64.
Instead of explicitly checking the input for specific values and
executing conditional jumps, just convert the integer back to a float
and check if this results in the rounded input. The "success value" is
then materialized via vmov + and instead of via branches.

old:
   7  c4e1fb2cd9           vcvttsd2siq rbx,xmm1
   c  ba01000000           movl rdx,0x1
  11  49ba000000000000e0c3 REX.W movq r10,0xc3e0000000000000
  1b  c441f96efa           vmovq xmm15,r10
  20  c5792ef9             vucomisd xmm15,xmm1
  24  7a08                 jpe 0x3599421714ee  <+0x2e>
  26  7408                 jz 0x3599421714f0  <+0x30>
  28  4883fb01             REX.W cmpq rbx,0x1
  2c  7102                 jno 0x3599421714f0  <+0x30>
  2e  33d2                 xorl rdx,rdx

new:
   7  c463010bf90b         vroundsd xmm15,xmm15,xmm1,0xb
   d  c4e1fb2cd9           vcvttsd2siq rbx,xmm1
  12  c4e1832ac3           vcvtqsi2sd xmm0,xmm15,rbx
  17  c4c17bc2c700         vcmpss xmm0,xmm0,xmm15, (eq)
  1d  c4e1f97ec2           vmovq rdx,xmm0
  22  83e201               andl rdx,0x1

A follow-up step would be to replace the explicitly materialized success
value by a direct jump to the code handling the error case, but that
requires more rewrite in TurboFan.

R=tebbi@chromium.org

Bug: v8:10005
Change-Id: Iaedc3f395fb3a8c11c936faa8c6e55c2dfe86cd9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3560434Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79854}
parent 51b99213
......@@ -3372,6 +3372,28 @@ void Assembler::haddps(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::cmpeqss(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xC2);
emit_sse_operand(dst, src);
emit(0x00); // EQ == 0
}
void Assembler::cmpeqsd(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xC2);
emit_sse_operand(dst, src);
emit(0x00); // EQ == 0
}
void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF2);
......@@ -3389,6 +3411,13 @@ void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) {
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundss(XMMRegister dst, Operand src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0A);
// Mask precision exception.
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0B);
......@@ -3396,6 +3425,13 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundsd(XMMRegister dst, Operand src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x0B);
// Mask precision exception.
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x08);
......
......@@ -1288,6 +1288,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void haddps(XMMRegister dst, XMMRegister src);
void haddps(XMMRegister dst, Operand src);
void cmpeqsd(XMMRegister dst, XMMRegister src);
void cmpeqss(XMMRegister dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src);
void movmskpd(Register dst, XMMRegister src);
......@@ -1309,7 +1311,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void pinsrq(XMMRegister dst, Operand src, uint8_t imm8);
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundss(XMMRegister dst, Operand src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, Operand src, RoundingMode mode);
void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
......@@ -1556,11 +1560,21 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundss(XMMRegister dst, XMMRegister src1, Operand src2,
RoundingMode mode) {
vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundsd(XMMRegister dst, XMMRegister src1, Operand src2,
RoundingMode mode) {
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
......@@ -1625,6 +1639,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vpd(0x50, idst, xmm0, src);
}
void vpmovmskb(Register dst, XMMRegister src);
void vcmpeqss(XMMRegister dst, XMMRegister src) {
vss(0xC2, dst, dst, src);
emit(0x00); // EQ == 0
}
void vcmpeqsd(XMMRegister dst, XMMRegister src) {
vsd(0xC2, dst, dst, src);
emit(0x00); // EQ == 0
}
void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
vps(0xC2, dst, src1, src2);
emit(cmp);
......
......@@ -1215,6 +1215,23 @@ void TurboAssembler::Cvttss2uiq(Register dst, XMMRegister src, Label* fail) {
ConvertFloatToUint64<XMMRegister, false>(this, dst, src, fail);
}
void TurboAssembler::Cmpeqss(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vcmpeqss(dst, src);
} else {
cmpeqss(dst, src);
}
}
void TurboAssembler::Cmpeqsd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vcmpeqsd(dst, src);
} else {
cmpeqsd(dst, src);
}
}
// ----------------------------------------------------------------------------
// Smi tagging, untagging and tag detection.
......
......@@ -169,6 +169,9 @@ class V8_EXPORT_PRIVATE TurboAssembler
void Cvtlsi2sd(XMMRegister dst, Register src);
void Cvtlsi2sd(XMMRegister dst, Operand src);
void Cmpeqss(XMMRegister dst, XMMRegister src);
void Cmpeqsd(XMMRegister dst, XMMRegister src);
void PextrdPreSse41(Register dst, XMMRegister src, uint8_t imm8);
void Pextrq(Register dst, XMMRegister src, int8_t imm8);
......
......@@ -2011,64 +2011,68 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEFloat32ToInt64:
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
} else {
__ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
}
if (instr->OutputCount() > 1) {
__ Move(i.OutputRegister(1), 1);
Label done;
Label fail;
__ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
case kSSEFloat32ToInt64: {
Register output_reg = i.OutputRegister(0);
if (instr->OutputCount() == 1) {
if (instr->InputAt(0)->IsFPRegister()) {
__ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
__ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
} else {
__ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
__ Cvttss2siq(output_reg, i.InputOperand(0));
}
// If the input is NaN, then the conversion fails.
__ j(parity_even, &fail, Label::kNear);
// If the input is INT64_MIN, then the conversion succeeds.
__ j(equal, &done, Label::kNear);
__ cmpq(i.OutputRegister(0), Immediate(1));
// If the conversion results in INT64_MIN, but the input was not
// INT64_MIN, then the conversion fails.
__ j(no_overflow, &done, Label::kNear);
__ bind(&fail);
__ Move(i.OutputRegister(1), 0);
__ bind(&done);
}
break;
case kSSEFloat64ToInt64:
break;
}
DCHECK_EQ(2, instr->OutputCount());
Register success_reg = i.OutputRegister(1);
DoubleRegister rounded = kScratchDoubleReg;
if (instr->InputAt(0)->IsFPRegister()) {
__ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
__ Roundss(rounded, i.InputDoubleRegister(0), kRoundToZero);
__ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
} else {
__ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
}
if (instr->OutputCount() > 1) {
__ Move(i.OutputRegister(1), 1);
Label done;
Label fail;
__ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
__ Roundss(rounded, i.InputOperand(0), kRoundToZero);
// Convert {rounded} instead of the input operand, to avoid another
// load.
__ Cvttss2siq(output_reg, rounded);
}
DoubleRegister converted_back = i.TempSimd128Register(0);
__ Cvtqsi2ss(converted_back, output_reg);
// Compare the converted back value to the rounded value, set success_reg
// to 0 if they differ, or 1 on success.
__ Cmpeqss(converted_back, rounded);
__ Movq(success_reg, converted_back);
__ And(success_reg, Immediate(1));
break;
}
case kSSEFloat64ToInt64: {
Register output_reg = i.OutputRegister(0);
if (instr->OutputCount() == 1) {
if (instr->InputAt(0)->IsFPRegister()) {
__ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
__ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
} else {
__ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
__ Cvttsd2siq(output_reg, i.InputOperand(0));
}
// If the input is NaN, then the conversion fails.
__ j(parity_even, &fail, Label::kNear);
// If the input is INT64_MIN, then the conversion succeeds.
__ j(equal, &done, Label::kNear);
__ cmpq(i.OutputRegister(0), Immediate(1));
// If the conversion results in INT64_MIN, but the input was not
// INT64_MIN, then the conversion fails.
__ j(no_overflow, &done, Label::kNear);
__ bind(&fail);
__ Move(i.OutputRegister(1), 0);
__ bind(&done);
break;
}
DCHECK_EQ(2, instr->OutputCount());
Register success_reg = i.OutputRegister(1);
DoubleRegister rounded = kScratchDoubleReg;
if (instr->InputAt(0)->IsFPRegister()) {
__ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
__ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
} else {
__ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
// Convert {rounded} instead of the input operand, to avoid another
// load.
__ Cvttsd2siq(output_reg, rounded);
}
DoubleRegister converted_back = i.TempSimd128Register(0);
__ Cvtqsi2sd(converted_back, output_reg);
// Compare the converted back value to the rounded value, set success_reg
// to 0 if they differ, or 1 on success.
__ Cmpeqsd(converted_back, rounded);
__ Movq(success_reg, converted_back);
__ And(success_reg, Immediate(1));
break;
}
case kSSEFloat32ToUint64: {
Label fail;
if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
......
......@@ -1445,30 +1445,36 @@ void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
X64OperandGenerator g(this);
InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
InstructionOperand outputs[2];
InstructionOperand temps[1];
size_t output_count = 0;
size_t temp_count = 0;
outputs[output_count++] = g.DefineAsRegister(node);
Node* success_output = NodeProperties::FindProjection(node, 1);
if (success_output) {
outputs[output_count++] = g.DefineAsRegister(success_output);
temps[temp_count++] = g.TempSimd128Register();
}
Emit(kSSEFloat32ToInt64, output_count, outputs, 1, inputs);
Emit(kSSEFloat32ToInt64, output_count, outputs, 1, inputs, temp_count, temps);
}
void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
X64OperandGenerator g(this);
InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
InstructionOperand outputs[2];
InstructionOperand temps[1];
size_t output_count = 0;
size_t temp_count = 0;
outputs[output_count++] = g.DefineAsRegister(node);
Node* success_output = NodeProperties::FindProjection(node, 1);
if (success_output) {
outputs[output_count++] = g.DefineAsRegister(success_output);
temps[temp_count++] = g.TempSimd128Register();
}
Emit(kSSEFloat64ToInt64, output_count, outputs, 1, inputs);
Emit(kSSEFloat64ToInt64, output_count, outputs, 1, inputs, temp_count, temps);
}
void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
......
......@@ -1158,6 +1158,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer("vcvtdq2pd %s,", NameOfAVXRegister(regop));
current += PrintRightAVXOperand(current);
break;
case 0xC2:
AppendToBuffer("vcmpss %s,%s,", NameOfAVXRegister(regop),
NameOfAVXRegister(vvvv));
current += PrintRightAVXOperand(current);
AppendToBuffer(", (%s)", cmp_pseudo_op[*current]);
current += 1;
break;
default:
UnimplementedInstruction();
}
......@@ -1213,6 +1220,13 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfAVXRegister(vvvv));
current += PrintRightAVXOperand(current);
break;
case 0xC2:
AppendToBuffer("vcmpsd %s,%s,", NameOfAVXRegister(regop),
NameOfAVXRegister(vvvv));
current += PrintRightAVXOperand(current);
AppendToBuffer(", (%s)", cmp_pseudo_op[*current]);
current += 1;
break;
#define DISASM_SSE2_INSTRUCTION_LIST_SD(instruction, _1, _2, opcode) \
case 0x##opcode: \
AppendToBuffer("v" #instruction " %s,%s,", NameOfAVXRegister(regop), \
......@@ -2296,6 +2310,8 @@ const char* DisassemblerX64::TwoByteMnemonic(byte opcode) {
return "movsxb";
case 0xBF:
return "movsxw";
case 0xC2:
return "cmpss";
default:
return nullptr;
}
......
......@@ -780,6 +780,8 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputSSE) {
COMPARE("440f178c8b10270000 movhps [rbx+rcx*4+0x2710],xmm9",
movhps(Operand(rbx, rcx, times_4, 10000), xmm9));
COMPARE("410fc6c100 shufps xmm0, xmm9, 0", shufps(xmm0, xmm9, 0x0));
COMPARE("f30fc2c100 cmpeqss xmm0,xmm1", cmpeqss(xmm0, xmm1));
COMPARE("f20fc2c100 cmpeqsd xmm0,xmm1", cmpeqsd(xmm0, xmm1));
COMPARE("0f2ec1 ucomiss xmm0,xmm1", ucomiss(xmm0, xmm1));
COMPARE("0f2e848b10270000 ucomiss xmm0,[rbx+rcx*4+0x2710]",
ucomiss(xmm0, Operand(rbx, rcx, times_4, 10000)));
......@@ -1027,8 +1029,12 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputSSE4_1) {
roundpd(xmm8, xmm3, kRoundToNearest));
COMPARE("66440f3a0ac309 roundss xmm8,xmm3,0x1",
roundss(xmm8, xmm3, kRoundDown));
COMPARE("66440f3a0a420b09 roundss xmm8,[rdx+0xb],0x1",
roundss(xmm8, Operand(rdx, 11), kRoundDown));
COMPARE("66440f3a0bc309 roundsd xmm8,xmm3,0x1",
roundsd(xmm8, xmm3, kRoundDown));
COMPARE("66440f3a0b420b09 roundsd xmm8,[rdx+0xb],0x1",
roundsd(xmm8, Operand(rdx, 11), kRoundDown));
#define COMPARE_SSE4_1_INSTR(instruction, _, __, ___, ____) \
exp = #instruction " xmm5,xmm1"; \
......@@ -1167,6 +1173,10 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputAVX) {
vmovss(xmm9, Operand(r11, rcx, times_8, -10000)));
COMPARE("c4a17a118c8b10270000 vmovss [rbx+r9*4+0x2710],xmm1",
vmovss(Operand(rbx, r9, times_4, 10000), xmm1));
COMPARE("c532c2c900 vcmpss xmm9,xmm9,xmm1, (eq)",
vcmpeqss(xmm9, xmm1));
COMPARE("c533c2c900 vcmpsd xmm9,xmm9,xmm1, (eq)",
vcmpeqsd(xmm9, xmm1));
COMPARE("c5782ec9 vucomiss xmm9,xmm1", vucomiss(xmm9, xmm1));
COMPARE("c5782e8453e52a0000 vucomiss xmm8,[rbx+rdx*2+0x2ae5]",
vucomiss(xmm8, Operand(rbx, rdx, times_2, 10981)));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment