Commit 26ecb4a1 authored by bbudge's avatar bbudge Committed by Commit bot

[Turbofan] Add ia32 support for 4 and 16 byte moves and swaps.

- Adds move/swap handling for 4 and 16 bytes to ia32.
- Register allocator now only requests 4 bytes for floats on ia32 and arm.
- We probably need similar support in mips.

LOG=N
BUG=v8:4124

Review-Url: https://codereview.chromium.org/2027043002
Cr-Commit-Position: refs/heads/master@{#37714}
parent 49ef529b
......@@ -2012,18 +2012,44 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
} else {
DCHECK(destination->IsFPStackSlot());
Operand dst = g.ToOperand(destination);
__ movsd(dst, src);
MachineRepresentation rep =
LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
__ movsd(dst, src);
} else if (rep == MachineRepresentation::kFloat32) {
__ movss(dst, src);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
__ movups(dst, src);
}
}
} else if (source->IsFPStackSlot()) {
DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
Operand src = g.ToOperand(source);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (destination->IsFPRegister()) {
XMMRegister dst = g.ToDoubleRegister(destination);
__ movsd(dst, src);
if (rep == MachineRepresentation::kFloat64) {
__ movsd(dst, src);
} else if (rep == MachineRepresentation::kFloat32) {
__ movss(dst, src);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
__ movups(dst, src);
}
} else {
Operand dst = g.ToOperand(destination);
__ movsd(kScratchDoubleReg, src);
__ movsd(dst, kScratchDoubleReg);
if (rep == MachineRepresentation::kFloat64) {
__ movsd(kScratchDoubleReg, src);
__ movsd(dst, kScratchDoubleReg);
} else if (rep == MachineRepresentation::kFloat32) {
__ movss(kScratchDoubleReg, src);
__ movss(dst, kScratchDoubleReg);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
__ movups(kScratchDoubleReg, src);
__ movups(dst, kScratchDoubleReg);
}
}
} else {
UNREACHABLE();
......@@ -2076,21 +2102,51 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
// XMM register-memory swap.
XMMRegister reg = g.ToDoubleRegister(source);
Operand other = g.ToOperand(destination);
__ movsd(kScratchDoubleReg, other);
__ movsd(other, reg);
__ movaps(reg, kScratchDoubleReg);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
__ movsd(kScratchDoubleReg, other);
__ movsd(other, reg);
__ movaps(reg, kScratchDoubleReg);
} else if (rep == MachineRepresentation::kFloat32) {
__ movss(kScratchDoubleReg, other);
__ movss(other, reg);
__ movaps(reg, kScratchDoubleReg);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
__ movups(kScratchDoubleReg, other);
__ movups(other, reg);
__ movups(reg, kScratchDoubleReg);
}
} else if (source->IsFPStackSlot() && destination->IsFPStackSlot()) {
// Double-width memory-to-memory.
Operand src0 = g.ToOperand(source);
Operand src1 = g.HighOperand(source);
Operand dst0 = g.ToOperand(destination);
Operand dst1 = g.HighOperand(destination);
__ movsd(kScratchDoubleReg, dst0); // Save destination in scratch register.
__ push(src0); // Then use stack to copy source to destination.
__ pop(dst0);
__ push(src1);
__ pop(dst1);
__ movsd(src0, kScratchDoubleReg);
MachineRepresentation rep = LocationOperand::cast(source)->representation();
if (rep == MachineRepresentation::kFloat64) {
Operand src1 = g.HighOperand(source);
Operand dst1 = g.HighOperand(destination);
__ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
__ push(src0); // Then use stack to copy src to destination.
__ pop(dst0);
__ push(src1);
__ pop(dst1);
__ movsd(src0, kScratchDoubleReg);
} else if (rep == MachineRepresentation::kFloat32) {
__ movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
__ push(src0); // Then use stack to copy src to destination.
__ pop(dst0);
__ movss(src0, kScratchDoubleReg);
} else {
DCHECK_EQ(MachineRepresentation::kSimd128, rep);
// Use the XOR trick to swap without a temporary.
__ movups(kScratchDoubleReg, src0);
__ xorps(kScratchDoubleReg, dst0); // scratch contains src ^ dst.
__ movups(src0, kScratchDoubleReg);
__ xorps(kScratchDoubleReg, dst0); // scratch contains src.
__ movups(dst0, kScratchDoubleReg);
__ xorps(kScratchDoubleReg, src0); // scratch contains dst.
__ movups(src0, kScratchDoubleReg);
}
} else {
// No other combinations are possible.
UNREACHABLE();
......
......@@ -72,8 +72,8 @@ int GetByteWidth(MachineRepresentation rep) {
case MachineRepresentation::kWord16:
case MachineRepresentation::kWord32:
case MachineRepresentation::kTagged:
return kPointerSize;
case MachineRepresentation::kFloat32:
return kPointerSize;
case MachineRepresentation::kWord64:
case MachineRepresentation::kFloat64:
return 8;
......
......@@ -2401,6 +2401,26 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::movups(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x11);
emit_sse_operand(dst, src);
}
void Assembler::movups(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x10);
emit_sse_operand(dst, src);
}
void Assembler::movups(const Operand& dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x11);
emit_sse_operand(src, dst);
}
void Assembler::shufps(XMMRegister dst, XMMRegister src, byte imm8) {
DCHECK(is_uint8(imm8));
......
......@@ -959,6 +959,9 @@ class Assembler : public AssemblerBase {
void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
void ucomiss(XMMRegister dst, const Operand& src);
void movaps(XMMRegister dst, XMMRegister src);
void movups(XMMRegister dst, XMMRegister src);
void movups(XMMRegister dst, const Operand& src);
void movups(const Operand& dst, XMMRegister src);
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
......
......@@ -1422,6 +1422,20 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0x10 || f0byte == 0x11) {
data += 2;
// movups xmm, xmm/m128
// movups xmm/m128, xmm
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movups ");
if (f0byte == 0x11) {
data += PrintRightXMMOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else {
AppendToBuffer("%s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
}
} else if (f0byte == 0x2e) {
data += 2;
int mod, regop, rm;
......
......@@ -390,6 +390,9 @@ TEST(DisasmIa320) {
{
// Move operation
__ movaps(xmm0, xmm1);
__ movups(xmm0, xmm1);
__ movups(xmm0, Operand(edx, 4));
__ movups(Operand(edx, 4), xmm0);
__ shufps(xmm0, xmm0, 0x0);
__ cvtsd2ss(xmm0, xmm1);
__ cvtsd2ss(xmm0, Operand(ebx, ecx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment