Commit fa5d09e5 authored by ahaas's avatar ahaas Committed by Commit bot

[turbofan] Implement rounding of floats on x64 and ia32 without sse4.1.

The implementation sets the rounding mode flag and then uses the
cvtsd2si and cvtsi2sd instructions (convert between float and int) to do
the rounding. Input values outside int range either don't have to be
rounded anyways, or are rounded by calculating input + 2^52 - 2^52 for
positive inputs, or input -2^52 + 2^52 for negative inputs. The original
rounding mode is restored afterwards.

R=titzer@chromium.org

B=575379

Review URL: https://codereview.chromium.org/1584663007

Cr-Commit-Position: refs/heads/master@{#33367}
parent b577ecfd
......@@ -654,10 +654,16 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
break;
}
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(masm(), SSE4_1);
__ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
} else {
Register scratch = i.TempRegister(0);
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), scratch,
mode);
}
break;
}
case kSSEFloat64Cmp:
......@@ -727,10 +733,16 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat64Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(masm(), SSE4_1);
__ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
} else {
Register scratch = i.TempRegister(0);
__ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), scratch,
kScratchDoubleReg, mode);
}
break;
}
case kSSEFloat32ToFloat64:
......
......@@ -871,32 +871,74 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundDown));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundDown));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Round | MiscField::encode(kRoundDown),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundDown));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundDown));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Round | MiscField::encode(kRoundDown),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat32RoundUp(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundUp));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundUp));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Round | MiscField::encode(kRoundUp),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat64RoundUp(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundUp));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundUp));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Round | MiscField::encode(kRoundUp),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToZero));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToZero));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Round | MiscField::encode(kRoundToZero),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToZero));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToZero));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Round | MiscField::encode(kRoundToZero),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
......@@ -906,12 +948,26 @@ void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToNearest));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToNearest));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat32Round | MiscField::encode(kRoundToNearest),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToNearest));
if (CpuFeatures::IsSupported(SSE4_1)) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToNearest));
} else {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEFloat64Round | MiscField::encode(kRoundToNearest),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), 1, temps);
}
}
......@@ -1323,20 +1379,19 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64Max |
MachineOperatorBuilder::kFloat64Min |
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz;
MachineOperatorBuilder::kWord32Ctz |
MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat32RoundUp |
MachineOperatorBuilder::kFloat32RoundTruncate |
MachineOperatorBuilder::kFloat32RoundTiesEven |
MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesEven;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
flags |= MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat32RoundUp |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat32RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat32RoundTiesEven |
MachineOperatorBuilder::kFloat64RoundTiesEven;
}
return flags;
}
......
......@@ -941,10 +941,10 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
ASSEMBLE_SSE_UNOP(Cvtss2sd);
break;
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister, mode);
break;
}
case kSSEFloat32ToInt32:
......@@ -1028,10 +1028,10 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
ASSEMBLE_SSE_UNOP(sqrtsd);
break;
case kSSEFloat64Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
__ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
kScratchRegister, mode);
break;
}
case kSSEFloat64ToFloat32:
......
......@@ -1226,32 +1226,44 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundDown));
X64OperandGenerator g(this);
Emit(kSSEFloat32Round | MiscField::encode(kRoundDown),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundDown));
X64OperandGenerator g(this);
Emit(kSSEFloat64Round | MiscField::encode(kRoundDown),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat32RoundUp(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundUp));
X64OperandGenerator g(this);
Emit(kSSEFloat32Round | MiscField::encode(kRoundUp),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat64RoundUp(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundUp));
X64OperandGenerator g(this);
Emit(kSSEFloat64Round | MiscField::encode(kRoundUp),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToZero));
X64OperandGenerator g(this);
Emit(kSSEFloat32Round | MiscField::encode(kRoundToZero),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToZero));
X64OperandGenerator g(this);
Emit(kSSEFloat64Round | MiscField::encode(kRoundToZero),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
......@@ -1261,12 +1273,16 @@ void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundToNearest));
X64OperandGenerator g(this);
Emit(kSSEFloat32Round | MiscField::encode(kRoundToNearest),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundToNearest));
X64OperandGenerator g(this);
Emit(kSSEFloat64Round | MiscField::encode(kRoundToNearest),
g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
}
......@@ -1799,21 +1815,19 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64Max |
MachineOperatorBuilder::kFloat64Min |
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz;
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz |
MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat32RoundUp |
MachineOperatorBuilder::kFloat32RoundTruncate |
MachineOperatorBuilder::kFloat32RoundTiesEven |
MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesEven;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt |
MachineOperatorBuilder::kWord64Popcnt;
}
if (CpuFeatures::IsSupported(SSE4_1)) {
flags |= MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat32RoundUp |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat32RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat32RoundTiesEven |
MachineOperatorBuilder::kFloat64RoundTiesEven;
}
return flags;
}
......
......@@ -1982,6 +1982,15 @@ void Assembler::cvttss2si(Register dst, const Operand& src) {
}
void Assembler::cvtss2si(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
EMIT(0x0F);
EMIT(0x2D);
emit_operand(dst, src);
}
void Assembler::cvttsd2si(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
......@@ -1991,6 +2000,15 @@ void Assembler::cvttsd2si(Register dst, const Operand& src) {
}
void Assembler::cvtsd2si(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
EMIT(0x0F);
EMIT(0x2D);
emit_operand(dst, src);
}
void Assembler::cvtsd2si(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
......@@ -2199,6 +2217,22 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
}
void Assembler::ldmxcsr(const Operand& dst) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0xAE);
emit_operand(2, dst);
}
void Assembler::stmxcsr(const Operand& dst) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0xAE);
emit_operand(3, dst);
}
void Assembler::movmskpd(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
......@@ -2849,11 +2883,17 @@ void Assembler::emit_arith(int sel, Operand dst, const Immediate& x) {
void Assembler::emit_operand(Register reg, const Operand& adr) {
emit_operand(reg.code(), adr);
}
void Assembler::emit_operand(int code, const Operand& adr) {
DCHECK(is_uint3(code));
const unsigned length = adr.len_;
DCHECK(length > 0);
// Emit updated ModRM byte containing the given register.
pc_[0] = (adr.buf_[0] & ~0x38) | (reg.code() << 3);
pc_[0] = (adr.buf_[0] & ~0x38) | (code << 3);
// Emit the rest of the encoded operand.
for (unsigned i = 1; i < length; i++) pc_[i] = adr.buf_[i];
......
......@@ -954,10 +954,13 @@ class Assembler : public AssemblerBase {
void cvttss2si(Register dst, XMMRegister src) {
cvttss2si(dst, Operand(src));
}
void cvtss2si(Register dst, const Operand& src);
void cvtss2si(Register dst, XMMRegister src) { cvtss2si(dst, Operand(src)); }
void cvttsd2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, XMMRegister src) {
cvttsd2si(dst, Operand(src));
}
void cvtsd2si(Register dst, const Operand& src);
void cvtsd2si(Register dst, XMMRegister src);
void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
......@@ -993,6 +996,9 @@ class Assembler : public AssemblerBase {
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void ldmxcsr(const Operand& dst);
void stmxcsr(const Operand& dst);
void movmskpd(Register dst, XMMRegister src);
void movmskps(Register dst, XMMRegister src);
......@@ -1495,6 +1501,7 @@ class Assembler : public AssemblerBase {
void emit_arith(int sel, Operand dst, const Immediate& x);
void emit_operand(Register reg, const Operand& adr);
void emit_operand(int code, const Operand& adr);
void emit_label(Label* label);
......
......@@ -1484,6 +1484,29 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else {
AppendToBuffer(",%s,cl", NameOfCPURegister(regop));
}
} else if (f0byte == 0xAE) {
// ldmxcsr and stmxcsr
data += 2;
byte modrm = *data;
int mod, regop, rm;
get_modrm(modrm, &mod, &regop, &rm);
regop &= 0x7; // The REX.R bit does not affect the operation.
const char* mnem = NULL;
switch (regop) {
case 2:
mnem = "ldmxcsr";
break;
case 3:
mnem = "stmxcsr";
break;
default:
UnimplementedInstruction();
return 2;
}
DCHECK_NOT_NULL(mnem);
AppendToBuffer("%s ", mnem);
data += PrintRightOperandHelper(
data, &DisassemblerIA32::NameOfCPURegister);
} else if (f0byte == 0xBC) {
data += 2;
int mod, regop, rm;
......@@ -1630,7 +1653,15 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
}
} else if (*data == 0x3A) {
data++;
if (*data == 0x0B) {
if (*data == 0x0A) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("roundss %s,%s,%d", NameOfXMMRegister(regop),
NameOfXMMRegister(rm), static_cast<int>(imm8));
data += 2;
} else if (*data == 0x0B) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
......
......@@ -581,12 +581,137 @@ void MacroAssembler::DebugBreak() {
}
void MacroAssembler::Cvtsi2ss(XMMRegister dst, const Operand& src) {
xorps(dst, dst);
cvtsi2ss(dst, src);
}
void MacroAssembler::Cvtsi2sd(XMMRegister dst, const Operand& src) {
xorps(dst, dst);
cvtsi2sd(dst, src);
}
void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
roundss(dst, src, mode);
} else {
// We have to store the original rounding mode to restore it later.
{
sub(esp, Immediate(kPointerSize * 2));
stmxcsr(Operand(esp, 0));
mov(tmp, Operand(esp, 0));
and_(tmp, Immediate(0xffff9fff));
or_(tmp, Immediate(mode << 13));
mov(Operand(esp, kPointerSize), tmp);
ldmxcsr(Operand(esp, kPointerSize));
}
// Do rounding by conversion to int.
cvtss2si(tmp, src);
Label out_of_range;
Label done;
// Check whether the input is within int32 range.
cmp(tmp, Immediate(1));
j(overflow, &out_of_range);
// If the conversion results in INT_MIN, then the input is outside
// int range, and due to the limited precision of float32 this means
// that the input must have been an integer already. We are therefore
// done already.
cvtsi2ss(dst, tmp);
if (!dst.is(src)) {
jmp(&done);
}
bind(&out_of_range);
if (!dst.is(src)) {
movss(dst, src);
}
bind(&done);
// Restore the original rounding mode.
ldmxcsr(Operand(esp, 0));
add(esp, Immediate(kPointerSize * 2));
}
}
void MacroAssembler::Roundsd(XMMRegister dst, XMMRegister src, Register tmp,
XMMRegister xtmp, RoundingMode mode) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
roundsd(dst, src, mode);
} else {
// We have to store the original rounding mode to restore it later.
{
sub(esp, Immediate(kPointerSize * 2));
stmxcsr(Operand(esp, 0));
mov(tmp, Operand(esp, 0));
and_(tmp, Immediate(0xffff9fff));
or_(tmp, Immediate(mode << 13));
mov(Operand(esp, kPointerSize), tmp);
ldmxcsr(Operand(esp, kPointerSize));
}
// Convert the input to int32.
cvtsd2si(tmp, src);
Label out_of_range;
Label done;
// Check whether the input is within int32 range.
cmp(tmp, Immediate(1));
j(overflow, &out_of_range);
// The input is within int32 range. We achieve rounding by converting
// back to float.
Cvtsi2sd(dst, tmp);
jmp(&done);
bind(&out_of_range);
if (!dst.is(src)) {
movsd(dst, src);
}
// If the input is outside [-2^52, 2^52], then the result = input.
int64_t offset = 1;
offset <<= 52;
Move(xtmp, static_cast<double>(offset));
ucomisd(xtmp, src);
j(below_equal, &done);
Move(xtmp, static_cast<double>(-offset));
ucomisd(xtmp, src);
j(above_equal, &done);
// Positive number have to be handled differently than negative numbers.
xorpd(xtmp, xtmp);
ucomisd(xtmp, src);
Move(xtmp, static_cast<double>(offset));
Label below_zero;
j(above, &below_zero);
addsd(dst, xtmp);
subsd(dst, xtmp);
jmp(&done);
bind(&below_zero);
subsd(dst, xtmp);
addsd(dst, xtmp);
bind(&done);
// Restore the original rounding mode.
ldmxcsr(Operand(esp, 0));
add(esp, Immediate(kPointerSize * 2));
}
}
bool MacroAssembler::IsUnsafeImmediate(const Immediate& x) {
static const int kMaxImmediateBits = 17;
if (!RelocInfo::IsNone(x.rmode_)) return false;
......
......@@ -344,6 +344,13 @@ class MacroAssembler: public Assembler {
// xorps to clear the dst register before cvtsi2sd to solve this issue.
void Cvtsi2sd(XMMRegister dst, Register src) { Cvtsi2sd(dst, Operand(src)); }
void Cvtsi2sd(XMMRegister dst, const Operand& src);
void Cvtsi2ss(XMMRegister dst, Register src) { Cvtsi2ss(dst, Operand(src)); }
void Cvtsi2ss(XMMRegister dst, const Operand& src);
void Roundss(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, Register tmp, XMMRegister xtmp,
RoundingMode mode);
// Support for constant splitting.
bool IsUnsafeImmediate(const Immediate& x);
......
......@@ -3125,6 +3125,28 @@ void Assembler::cvttss2siq(Register dst, XMMRegister src) {
void Assembler::cvttss2siq(Register dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x2D);
emit_sse_operand(dst, src);
}
void Assembler::cvtss2siq(Register dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x2D);
emit_sse_operand(dst, src);
}
void Assembler::cvtss2siq(Register dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF3);
......@@ -3310,6 +3332,17 @@ void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
}
void Assembler::cvtsd2siq(Register dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_rex_64(dst, src);
emit(0x0F);
emit(0x2D);
emit_sse_operand(dst, src);
}
void Assembler::addsd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0xF2);
......@@ -3546,6 +3579,22 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
}
void Assembler::ldmxcsr(const Operand& dst) {
EnsureSpace ensure_space(this);
emit(0x0F);
emit(0xAE);
emit_operand(2, dst);
}
void Assembler::stmxcsr(const Operand& dst) {
EnsureSpace ensure_space(this);
emit(0x0F);
emit(0xAE);
emit_operand(3, dst);
}
void Assembler::movmskpd(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
......
......@@ -1080,6 +1080,8 @@ class Assembler : public AssemblerBase {
void cvttsd2si(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvttss2siq(Register dst, const Operand& src);
void cvtss2siq(Register dst, XMMRegister src);
void cvtss2siq(Register dst, const Operand& src);
void cvttsd2siq(Register dst, XMMRegister src);
void cvttsd2siq(Register dst, const Operand& src);
......@@ -1100,6 +1102,7 @@ class Assembler : public AssemblerBase {
void cvtsd2si(Register dst, XMMRegister src);
void cvtsd2siq(Register dst, XMMRegister src);
void cvtsd2siq(Register dst, const Operand& src);
void addsd(XMMRegister dst, XMMRegister src);
void addsd(XMMRegister dst, const Operand& src);
......@@ -1142,6 +1145,9 @@ class Assembler : public AssemblerBase {
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void ldmxcsr(const Operand& dst);
void stmxcsr(const Operand& dst);
// AVX instruction
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
......@@ -1416,6 +1422,14 @@ class Assembler : public AssemblerBase {
XMMRegister idst = {dst.code()};
vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
}
void vcvtss2siq(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
vsd(0x2d, idst, xmm0, src, kF3, k0F, kW1);
}
void vcvtss2siq(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
vsd(0x2d, idst, xmm0, src, kF3, k0F, kW1);
}
void vcvttsd2siq(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
......@@ -1424,6 +1438,14 @@ class Assembler : public AssemblerBase {
XMMRegister idst = {dst.code()};
vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
}
void vcvtsd2siq(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
vsd(0x2d, idst, xmm0, src, kF2, k0F, kW1);
}
void vcvtsd2siq(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
vsd(0x2d, idst, xmm0, src, kF2, k0F, kW1);
}
void vcvtsd2si(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0);
......
......@@ -994,6 +994,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2d:
AppendToBuffer("vcvtss2si%s %s,", vex_w() ? "q" : "",
NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x58:
AppendToBuffer("vaddss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1711,6 +1716,14 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("cvttss2si%c %s,",
operand_size_code(), NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x2D) {
// CVTSS2SI:
// Convert with rounded scalar single-precision FP to dword integer.
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("cvtss2si%c %s,", operand_size_code(),
NameOfCPURegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x7E) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
......@@ -1871,6 +1884,27 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += PrintRightOperand(current);
} else if (opcode == 0x0B) {
AppendToBuffer("ud2");
} else if (opcode == 0xAE) {
byte modrm = *(data + 2);
int mod, regop, rm;
get_modrm(modrm, &mod, &regop, &rm);
regop &= 0x7; // The REX.R bit does not affect the operation.
const char* mnem = NULL;
switch (regop) {
case 2:
mnem = "ldmxcsr";
break;
case 3:
mnem = "stmxcsr";
break;
default:
UnimplementedInstruction();
return 2;
}
DCHECK_NOT_NULL(mnem);
AppendToBuffer("%s ", mnem);
current +=
PrintRightOperandHelper(current, &DisassemblerX64::NameOfCPURegister);
} else {
UnimplementedInstruction();
}
......
......@@ -1002,6 +1002,46 @@ void MacroAssembler::Cvttss2siq(Register dst, const Operand& src) {
}
void MacroAssembler::Cvtss2siq(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcvtss2siq(dst, src);
} else {
cvtss2siq(dst, src);
}
}
void MacroAssembler::Cvtss2siq(Register dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcvtss2siq(dst, src);
} else {
cvtss2siq(dst, src);
}
}
void MacroAssembler::Cvtsd2siq(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcvtsd2siq(dst, src);
} else {
cvtsd2siq(dst, src);
}
}
void MacroAssembler::Cvtsd2siq(Register dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vcvtsd2siq(dst, src);
} else {
cvtsd2siq(dst, src);
}
}
void MacroAssembler::Cvttsd2siq(Register dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......@@ -2816,6 +2856,53 @@ void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src,
}
void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(this, SSE4_1);
Roundss(dst, src, mode);
} else {
{
// Set the right rounding mode.
subq(rsp, Immediate(kPointerSize * 2));
stmxcsr(Operand(rsp, 0));
movl(tmp, Operand(rsp, 0));
andl(tmp, Immediate(0xffff9fff));
orl(tmp, Immediate(mode << 13));
movl(Operand(rsp, kPointerSize), tmp);
ldmxcsr(Operand(rsp, kPointerSize));
}
// Do rounding by conversion to int64.
Cvtss2siq(tmp, src);
Label done;
Label out_of_range;
cmpq(tmp, Immediate(1));
// If the conversion results in INT64_MIN, then the input is outside
// int64 range, and due to the limited precision of float32 this means
// that the input must have been an integer already. We are therefore
// done already.
j(overflow, &out_of_range);
// Rounding is done by converting the value back to float.
Cvtqsi2ss(dst, tmp);
if (!dst.is(src)) {
jmp(&done);
}
bind(&out_of_range);
if (!dst.is(src)) {
movss(dst, src);
}
bind(&done);
// Restore the original rounding mode.
ldmxcsr(Operand(rsp, 0));
addq(rsp, Immediate(kPointerSize * 2));
}
}
void MacroAssembler::Roundsd(XMMRegister dst, XMMRegister src,
RoundingMode mode) {
if (CpuFeatures::IsSupported(AVX)) {
......@@ -2827,6 +2914,53 @@ void MacroAssembler::Roundsd(XMMRegister dst, XMMRegister src,
}
void MacroAssembler::Roundsd(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(this, SSE4_1);
Roundsd(dst, src, mode);
} else {
{
// Set the right rounding mode.
subq(rsp, Immediate(kPointerSize * 2));
stmxcsr(Operand(rsp, 0));
movl(tmp, Operand(rsp, 0));
andl(tmp, Immediate(0xffff9fff));
orl(tmp, Immediate(mode << 13));
movl(Operand(rsp, kPointerSize), tmp);
ldmxcsr(Operand(rsp, kPointerSize));
}
// Do rounding by conversion to int64.
Cvtsd2siq(tmp, src);
Label out_of_range;
Label done;
cmpq(tmp, Immediate(1));
// If the conversion results in INT64_MIN, then the input is outside
// int64 range, and due to the limited precision of float64 this means
// that the input must have been an integer already. We are therefore
// done already.
j(overflow, &out_of_range);
// Rounding is done by converting the value back to float.
Cvtqsi2sd(dst, tmp);
if (!dst.is(src)) {
jmp(&done);
}
bind(&out_of_range);
if (!dst.is(src)) {
movsd(dst, src);
}
bind(&done);
// Restore the original rounding mode.
ldmxcsr(Operand(rsp, 0));
addq(rsp, Immediate(kPointerSize * 2));
}
}
void MacroAssembler::Sqrtsd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......
......@@ -831,8 +831,12 @@ class MacroAssembler: public Assembler {
void Cvttsd2si(Register dst, const Operand& src);
void Cvttss2siq(Register dst, XMMRegister src);
void Cvttss2siq(Register dst, const Operand& src);
void Cvtss2siq(Register dst, XMMRegister src);
void Cvtss2siq(Register dst, const Operand& src);
void Cvttsd2siq(Register dst, XMMRegister src);
void Cvttsd2siq(Register dst, const Operand& src);
void Cvtsd2siq(Register dst, XMMRegister src);
void Cvtsd2siq(Register dst, const Operand& src);
// Move if the registers are not identical.
void Move(Register target, Register source);
......@@ -967,7 +971,11 @@ class MacroAssembler: public Assembler {
void Movmskpd(Register dst, XMMRegister src);
void Roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundss(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, Register tmp,
RoundingMode mode);
void Sqrtsd(XMMRegister dst, XMMRegister src);
void Sqrtsd(XMMRegister dst, const Operand& src);
......
......@@ -176,49 +176,36 @@ class ValueHelper {
static std::vector<double> float64_vector() {
static const double nan = std::numeric_limits<double>::quiet_NaN();
static const double values[] = {-2e66,
-9223373136366403584.0,
-9223372036854775808.0, // INT64_MIN
-2147483649.5,
-2147483648.25,
-2147483648.0,
-2147483647.875,
-2147483647.125,
-2147483647.0,
-999.75,
-2e66,
-1.75,
-1.0,
-0.5,
-0.0,
0.0,
3e-88,
0.125,
0.25,
0.375,
0.5,
1.0,
1.25,
2,
3.1e7,
5.125,
6.25,
888,
982983.25,
2147483647.0,
2147483647.375,
2147483647.75,
2147483648.0,
2147483648.25,
2147483649.25,
9223372036854775807.0, // INT64_MAX
9223373136366403584.0,
18446744073709551615.0, // UINT64_MAX
2e66,
V8_INFINITY,
-V8_INFINITY,
-nan,
nan};
static const double values[] = {
-2e66,
-162259276829213363391578010288128.0, // 2^107
-81129638414606681695789005144064.0, // 2^106
-40564819207303349855093757313024.0, // 2^105
-40564819207303340847894502572032.0, -9223373136366403584.75,
-9223373136366403584.5, -9223373136366403584.25, -9223373136366403584.0,
-9223372036854775808.0, // INT64_MIN
-4503599627370497.5,
-4503599627370496.5, // -2^52 - 0.5
-4503599627370495.5,
-2251799813685248.5, // 2^51 + 0.5
-34359738368.75, -34359738368.5, -34359738368.25,
-4294967295.0, // -2^32 + 1.0
-2147483649.5, -2147483648.25, -2147483648.0, -2147483647.875,
-2147483647.125, -2147483647.0, -999.75, -1.75, -1.0, -0.5, -0.0, 0.0,
3e-88, 0.125, 0.25, 0.375, 0.5, 1.0, 1.25, 2, 3.1e7, 5.125, 6.25, 888,
982983.25, 2147483647.0, 2147483647.375, 2147483647.75, 2147483648.0,
2147483648.25, 2147483649.25,
4294967295.0, // +2^32 + 1.0
34359738368.75, 34359738368.5, 34359738368.25,
2251799813685248.5, // 2^51 + 0.5
4503599627370495.5,
4503599627370496.5, // 2^52 + 0.5
4503599627370497.5,
9223372036854775807.0, // INT64_MAX
9223373136366403584.0,
18446744073709551615.0, // UINT64_MAX
// 81129638414606681695789005144064.0,
2e66, V8_INFINITY, -V8_INFINITY, -nan, nan};
return std::vector<double>(&values[0], &values[arraysize(values)]);
}
......
......@@ -2909,9 +2909,6 @@ TEST(Run_Wasm_MultipleCallIndirect) {
}
// TODO(titzer): Fix for nosee4 and re-enable.
#if 0
TEST(Run_Wasm_F32Floor) {
WasmRunner<float> r(MachineType::Float32());
BUILD(r, WASM_F32_FLOOR(WASM_GET_LOCAL(0)));
......@@ -2960,6 +2957,32 @@ TEST(Run_Wasm_F64Ceil) {
}
TEST(Run_WasmCallF64StackParameterTrunc) {
// Build the target function.
LocalType param_types[20];
for (int i = 0; i < 20; i++) param_types[i] = kAstF64;
FunctionSig sig(1, 19, param_types);
TestingModule module;
WasmFunctionCompiler t(&sig);
BUILD(t, WASM_F64_TRUNC(WASM_GET_LOCAL(17)));
uint32_t index = t.CompileAndAdd(&module);
// Build the calling function.
WasmRunner<double> r(MachineType::Float64());
r.env()->module = &module;
BUILD(r, WASM_CALL_FUNCTION(index, WASM_F64(1.0), WASM_F64(2.0),
WASM_F64(4.0), WASM_F64(8.0), WASM_F64(16.0),
WASM_F64(32.0), WASM_F64(64.0), WASM_F64(128.0),
WASM_F64(256.0), WASM_F64(1.5), WASM_F64(2.5),
WASM_F64(4.5), WASM_F64(8.5), WASM_F64(16.5),
WASM_F64(32.5), WASM_F64(64.5), WASM_F64(128.5),
WASM_GET_LOCAL(0), WASM_F64(512.5)));
FOR_FLOAT64_INPUTS(i) { CheckDoubleEq(trunc(*i), r.Call(*i)); }
}
TEST(Run_Wasm_F64Trunc) {
WasmRunner<double> r(MachineType::Float64());
BUILD(r, WASM_F64_TRUNC(WASM_GET_LOCAL(0)));
......@@ -2975,8 +2998,6 @@ TEST(Run_Wasm_F64NearestInt) {
FOR_FLOAT64_INPUTS(i) { CheckDoubleEq(nearbyint(*i), r.Call(*i)); }
}
#endif
TEST(Run_Wasm_F32Min) {
WasmRunner<float> r(MachineType::Float32(), MachineType::Float32());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment