Commit fa60b82b authored by alph's avatar alph Committed by Commit bot

[x64] Replace movaps with appropriate vmov* instructions when AVX is enabled.

BUG=v8:4406
LOG=N

Review URL: https://codereview.chromium.org/1416663004

Cr-Commit-Position: refs/heads/master@{#31391}
parent 722719fe
......@@ -849,7 +849,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
ASSEMBLE_SSE_BINOP(divsd);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulsd depending on the result.
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kSSEFloat64Mod: {
__ subq(rsp, Immediate(kDoubleSize));
......@@ -1004,7 +1004,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
ASSEMBLE_AVX_BINOP(vdivss);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulss depending on the result.
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
__ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat32Max:
ASSEMBLE_AVX_BINOP(vmaxss);
......@@ -1034,7 +1034,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
ASSEMBLE_AVX_BINOP(vdivsd);
// Don't delete this mov. It may improve performance on some CPUs,
// when there is a (v)mulsd depending on the result.
__ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
__ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
break;
case kAVXFloat64Max:
ASSEMBLE_AVX_BINOP(vmaxsd);
......@@ -1739,7 +1739,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
XMMRegister src = g.ToDoubleRegister(source);
if (destination->IsDoubleRegister()) {
XMMRegister dst = g.ToDoubleRegister(destination);
__ movaps(dst, src);
__ Movapd(dst, src);
} else {
DCHECK(destination->IsDoubleStackSlot());
Operand dst = g.ToOperand(destination);
......@@ -1790,9 +1790,9 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
// available as a fixed scratch register.
XMMRegister src = g.ToDoubleRegister(source);
XMMRegister dst = g.ToDoubleRegister(destination);
__ movaps(xmm0, src);
__ movaps(src, dst);
__ movaps(dst, xmm0);
__ Movapd(xmm0, src);
__ Movapd(src, dst);
__ Movapd(dst, xmm0);
} else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
// XMM register-memory swap. We rely on having xmm0
// available as a fixed scratch register.
......
......@@ -2744,6 +2744,7 @@ void Assembler::movsd(XMMRegister dst, const Operand& src) {
void Assembler::movaps(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
if (src.low_bits() == 4) {
// Try to avoid an unnecessary SIB byte.
......@@ -2772,6 +2773,7 @@ void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
void Assembler::movapd(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
if (src.low_bits() == 4) {
// Try to avoid an unnecessary SIB byte.
......@@ -2950,6 +2952,17 @@ void Assembler::ucomiss(XMMRegister dst, const Operand& src) {
}
void Assembler::movss(XMMRegister dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit(0xF3); // single
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x10); // load
emit_sse_operand(dst, src);
}
void Assembler::movss(XMMRegister dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
EnsureSpace ensure_space(this);
......
......@@ -1008,6 +1008,13 @@ class Assembler : public AssemblerBase {
void ucomiss(XMMRegister dst, XMMRegister src);
void ucomiss(XMMRegister dst, const Operand& src);
void movaps(XMMRegister dst, XMMRegister src);
// Don't use this unless it's important to keep the
// top half of the destination register unchanged.
// Use movaps when moving float values and movd for integer
// values in xmm registers.
void movss(XMMRegister dst, XMMRegister src);
void movss(XMMRegister dst, const Operand& src);
void movss(const Operand& dst, XMMRegister src);
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
......@@ -1044,7 +1051,7 @@ class Assembler : public AssemblerBase {
// Don't use this unless it's important to keep the
// top half of the destination register unchanged.
// Used movaps when moving double values and movq for integer
// Use movapd when moving double values and movq for integer
// values in xmm registers.
void movsd(XMMRegister dst, XMMRegister src);
......@@ -1281,10 +1288,12 @@ class Assembler : public AssemblerBase {
void vmovq(XMMRegister dst, const Operand& src);
void vmovq(Register dst, XMMRegister src);
void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vsd(0x10, dst, src1, src2);
}
void vmovsd(XMMRegister dst, const Operand& src) {
vsd(0x10, dst, xmm0, src);
}
void vmovsd(XMMRegister dst, XMMRegister src) { vsd(0x10, dst, xmm0, src); }
void vmovsd(const Operand& dst, XMMRegister src) {
vsd(0x11, src, xmm0, dst);
}
......@@ -1413,6 +1422,9 @@ class Assembler : public AssemblerBase {
void vminss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vss(0x5d, dst, src1, src2);
}
void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vss(0x10, dst, src1, src2);
}
void vmovss(XMMRegister dst, const Operand& src) {
vss(0x10, dst, xmm0, src);
}
......@@ -1601,6 +1613,7 @@ class Assembler : public AssemblerBase {
void rorxl(Register dst, Register src, byte imm8);
void rorxl(Register dst, const Operand& src, byte imm8);
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
void vmovmskpd(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
......
......@@ -958,11 +958,17 @@ int DisassemblerX64::AVXInstruction(byte* data) {
switch (opcode) {
case 0x10:
AppendToBuffer("vmovss %s,", NameOfXMMRegister(regop));
if (mod == 3) {
AppendToBuffer("%s,", NameOfXMMRegister(vvvv));
}
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovss ");
current += PrintRightXMMOperand(current);
if (mod == 3) {
AppendToBuffer(",%s", NameOfXMMRegister(vvvv));
}
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x58:
......@@ -1009,11 +1015,17 @@ int DisassemblerX64::AVXInstruction(byte* data) {
switch (opcode) {
case 0x10:
AppendToBuffer("vmovsd %s,", NameOfXMMRegister(regop));
if (mod == 3) {
AppendToBuffer("%s,", NameOfXMMRegister(vvvv));
}
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovsd ");
current += PrintRightXMMOperand(current);
if (mod == 3) {
AppendToBuffer(",%s", NameOfXMMRegister(vvvv));
}
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x2a:
......@@ -1176,6 +1188,15 @@ int DisassemblerX64::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x29:
AppendToBuffer("vmovaps ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x2e:
AppendToBuffer("vucomiss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......
......@@ -211,7 +211,7 @@ void LGapResolver::EmitMove(int index) {
} else if (source->IsDoubleRegister()) {
XMMRegister src = cgen_->ToDoubleRegister(source);
if (destination->IsDoubleRegister()) {
__ movaps(cgen_->ToDoubleRegister(destination), src);
__ Movapd(cgen_->ToDoubleRegister(destination), src);
} else {
DCHECK(destination->IsDoubleStackSlot());
__ Movsd(cgen_->ToOperand(destination), src);
......@@ -270,9 +270,9 @@ void LGapResolver::EmitSwap(int index) {
// Swap two double registers.
XMMRegister source_reg = cgen_->ToDoubleRegister(source);
XMMRegister destination_reg = cgen_->ToDoubleRegister(destination);
__ movaps(xmm0, source_reg);
__ movaps(source_reg, destination_reg);
__ movaps(destination_reg, xmm0);
__ Movapd(xmm0, source_reg);
__ Movapd(source_reg, destination_reg);
__ Movapd(destination_reg, xmm0);
} else if (source->IsDoubleRegister() || destination->IsDoubleRegister()) {
// Swap a double register and a double stack slot.
......@@ -284,9 +284,9 @@ void LGapResolver::EmitSwap(int index) {
LOperand* other = source->IsDoubleRegister() ? destination : source;
DCHECK(other->IsDoubleStackSlot());
Operand other_operand = cgen_->ToOperand(other);
__ Movsd(xmm0, other_operand);
__ Movsd(other_operand, reg);
__ movaps(reg, xmm0);
__ Movapd(xmm0, reg);
__ Movsd(reg, other_operand);
__ Movsd(other_operand, xmm0);
} else {
// No other combinations are possible.
......
......@@ -2529,6 +2529,16 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) {
}
void MacroAssembler::Movaps(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovaps(dst, src);
} else {
movaps(dst, src);
}
}
void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......@@ -2542,7 +2552,7 @@ void MacroAssembler::Movapd(XMMRegister dst, XMMRegister src) {
void MacroAssembler::Movsd(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovsd(dst, src);
vmovsd(dst, dst, src);
} else {
movsd(dst, src);
}
......@@ -2569,6 +2579,16 @@ void MacroAssembler::Movsd(const Operand& dst, XMMRegister src) {
}
void MacroAssembler::Movss(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vmovss(dst, dst, src);
} else {
movss(dst, src);
}
}
void MacroAssembler::Movss(XMMRegister dst, const Operand& src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
......@@ -3032,9 +3052,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
punpckldq(dst, xmm0);
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
Movss(dst, xmm0);
}
}
......@@ -3051,9 +3069,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
punpckldq(dst, xmm0);
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
Movss(dst, xmm0);
}
}
......
......@@ -904,10 +904,10 @@ class MacroAssembler: public Assembler {
void Move(XMMRegister dst, float src) { Move(dst, bit_cast<uint32_t>(src)); }
void Move(XMMRegister dst, double src) { Move(dst, bit_cast<uint64_t>(src)); }
void Movapd(XMMRegister dst, XMMRegister src);
void Movsd(XMMRegister dst, XMMRegister src);
void Movsd(XMMRegister dst, const Operand& src);
void Movsd(const Operand& dst, XMMRegister src);
void Movss(XMMRegister dst, XMMRegister src);
void Movss(XMMRegister dst, const Operand& src);
void Movss(const Operand& dst, XMMRegister src);
......@@ -917,6 +917,8 @@ class MacroAssembler: public Assembler {
void Movq(XMMRegister dst, Register src);
void Movq(Register dst, XMMRegister src);
void Movaps(XMMRegister dst, XMMRegister src);
void Movapd(XMMRegister dst, XMMRegister src);
void Movmskpd(Register dst, XMMRegister src);
void Ucomiss(XMMRegister src1, XMMRegister src2);
......
......@@ -1284,7 +1284,8 @@ TEST(AssemblerX64AVX_ss) {
__ vmovd(xmm4, rdx);
__ vmovss(Operand(rsp, 0), xmm4);
__ vmovss(xmm5, Operand(rsp, 0));
__ vmovd(rcx, xmm5);
__ vmovaps(xmm6, xmm5);
__ vmovd(rcx, xmm6);
__ cmpl(rcx, rdx);
__ movl(rax, Immediate(9));
__ j(not_equal, &exit);
......@@ -1368,7 +1369,7 @@ TEST(AssemblerX64AVX_sd) {
__ vmaxsd(xmm4, xmm0, xmm1);
__ vmovsd(Operand(rsp, kDoubleSize), xmm4);
__ vmovsd(xmm5, Operand(rsp, kDoubleSize));
__ vmovsd(xmm6, xmm5);
__ vmovsd(xmm6, xmm6, xmm5);
__ vmovapd(xmm3, xmm6);
// Test vcvtss2sd & vcvtsd2ss
......
......@@ -496,6 +496,10 @@ TEST(DisasmX64) {
{
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(&assm, AVX);
__ vmovss(xmm6, xmm14, xmm2);
__ vmovss(xmm9, Operand(rbx, rcx, times_4, 10000));
__ vmovss(Operand(rbx, rcx, times_4, 10000), xmm0);
__ vaddss(xmm0, xmm1, xmm2);
__ vaddss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vmulss(xmm0, xmm1, xmm2);
......@@ -520,7 +524,7 @@ TEST(DisasmX64) {
__ vmovq(xmm9, Operand(rbx, rcx, times_4, 10000));
__ vmovq(r9, xmm6);
__ vmovsd(xmm6, xmm2);
__ vmovsd(xmm6, xmm14, xmm2);
__ vmovsd(xmm9, Operand(rbx, rcx, times_4, 10000));
__ vmovsd(Operand(rbx, rcx, times_4, 10000), xmm0);
......@@ -550,6 +554,7 @@ TEST(DisasmX64) {
__ vcvttsd2siq(rdi, xmm9);
__ vcvttsd2siq(r8, Operand(r9, rbx, times_4, 10000));
__ vmovaps(xmm10, xmm11);
__ vmovapd(xmm7, xmm0);
__ vmovmskpd(r9, xmm4);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment