Commit dad635ee authored by jacob.bramley's avatar jacob.bramley Committed by Commit bot

[arm64] Use SP-offset rather than FP-offset.

A64 loads and stores can have much larger positive than negative
immediate offsets, and since most frame slots are below fp, we can
significantly improve accesses by basing them on sp instead. Typical
example:

    Before                  After
    mov x16, #-416
    str x20, [fp, x16]      str x20, [jssp, #32]

Notable benchmark results include lua_binarytrees, which improves by
about 7.5% on A57 and 5% on A53. Several other asm.js benchmarks gain
2-4%.

Review URL: https://codereview.chromium.org/1376173003

Cr-Commit-Position: refs/heads/master@{#32111}
parent e44c3238
...@@ -207,6 +207,15 @@ class Arm64OperandConverter final : public InstructionOperandConverter { ...@@ -207,6 +207,15 @@ class Arm64OperandConverter final : public InstructionOperandConverter {
DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot()); DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot());
FrameOffset offset = FrameOffset offset =
linkage()->GetFrameOffset(AllocatedOperand::cast(op)->index(), frame()); linkage()->GetFrameOffset(AllocatedOperand::cast(op)->index(), frame());
if (offset.from_frame_pointer()) {
int from_sp =
offset.offset() + (frame()->GetSpToFpSlotCount() * kPointerSize);
// Convert FP-offsets to SP-offsets if it results in better code.
if (Assembler::IsImmLSUnscaled(from_sp) ||
Assembler::IsImmLSScaled(from_sp, LSDoubleWord)) {
offset = FrameOffset::FromStackPointer(from_sp);
}
}
return MemOperand(offset.from_stack_pointer() ? masm->StackPointer() : fp, return MemOperand(offset.from_stack_pointer() ? masm->StackPointer() : fp,
offset.offset()); offset.offset());
} }
...@@ -477,6 +486,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -477,6 +486,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Add(target, target, Code::kHeaderSize - kHeapObjectTag); __ Add(target, target, Code::kHeaderSize - kHeapObjectTag);
__ Call(target); __ Call(target);
} }
frame()->ClearOutgoingParameterSlots();
RecordCallPosition(instr); RecordCallPosition(instr);
break; break;
} }
...@@ -491,6 +501,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -491,6 +501,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Add(target, target, Code::kHeaderSize - kHeapObjectTag); __ Add(target, target, Code::kHeaderSize - kHeapObjectTag);
__ Jump(target); __ Jump(target);
} }
frame()->ClearOutgoingParameterSlots();
break; break;
} }
case kArchCallJSFunction: { case kArchCallJSFunction: {
...@@ -506,6 +517,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -506,6 +517,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
} }
__ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset)); __ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
__ Call(x10); __ Call(x10);
frame()->ClearOutgoingParameterSlots();
RecordCallPosition(instr); RecordCallPosition(instr);
break; break;
} }
...@@ -523,6 +535,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -523,6 +535,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
AssembleDeconstructActivationRecord(stack_param_delta); AssembleDeconstructActivationRecord(stack_param_delta);
__ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset)); __ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
__ Jump(x10); __ Jump(x10);
frame()->ClearOutgoingParameterSlots();
break; break;
} }
case kArchLazyBailout: { case kArchLazyBailout: {
...@@ -545,6 +558,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -545,6 +558,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
Register func = i.InputRegister(0); Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters, 0); __ CallCFunction(func, num_parameters, 0);
} }
// CallCFunction only supports register arguments so we never need to call
// frame()->ClearOutgoingParameterSlots() here.
DCHECK(frame()->GetOutgoingParameterSlotCount() == 0);
break; break;
} }
case kArchJmp: case kArchJmp:
...@@ -828,8 +844,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ...@@ -828,8 +844,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kArm64CompareAndBranch32: case kArm64CompareAndBranch32:
// Pseudo instruction turned into cbz/cbnz in AssembleArchBranch. // Pseudo instruction turned into cbz/cbnz in AssembleArchBranch.
break; break;
case kArm64Claim: { case kArm64ClaimForCallArguments: {
__ Claim(i.InputInt32(0)); __ Claim(i.InputInt32(0));
frame()->AllocateOutgoingParameterSlots(i.InputInt32(0));
break; break;
} }
case kArm64Poke: { case kArm64Poke: {
...@@ -1229,13 +1246,6 @@ void CodeGenerator::AssembleDeoptimizerCall( ...@@ -1229,13 +1246,6 @@ void CodeGenerator::AssembleDeoptimizerCall(
} }
// TODO(dcarney): increase stack slots in frame once before first use.
static int AlignedStackSlots(int stack_slots) {
if (stack_slots & 1) stack_slots++;
return stack_slots;
}
void CodeGenerator::AssemblePrologue() { void CodeGenerator::AssemblePrologue() {
CallDescriptor* descriptor = linkage()->GetIncomingDescriptor(); CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
if (descriptor->kind() == CallDescriptor::kCallAddress) { if (descriptor->kind() == CallDescriptor::kCallAddress) {
...@@ -1269,13 +1279,12 @@ void CodeGenerator::AssemblePrologue() { ...@@ -1269,13 +1279,12 @@ void CodeGenerator::AssemblePrologue() {
stack_shrink_slots -= OsrHelper(info()).UnoptimizedFrameSlots(); stack_shrink_slots -= OsrHelper(info()).UnoptimizedFrameSlots();
} }
if (stack_shrink_slots > 0) { if (csp.Is(masm()->StackPointer())) {
Register sp = __ StackPointer(); // The system stack pointer requires 16-byte alignment at function call
if (!sp.Is(csp)) { // boundaries.
__ Sub(sp, sp, stack_shrink_slots * kPointerSize); stack_shrink_slots += frame()->AlignSavedCalleeRegisterSlots();
}
__ Sub(csp, csp, AlignedStackSlots(stack_shrink_slots) * kPointerSize);
} }
__ Claim(stack_shrink_slots);
// Save FP registers. // Save FP registers.
CPURegList saves_fp = CPURegList(CPURegister::kFPRegister, kDRegSizeInBits, CPURegList saves_fp = CPURegList(CPURegister::kFPRegister, kDRegSizeInBits,
......
...@@ -76,7 +76,7 @@ namespace compiler { ...@@ -76,7 +76,7 @@ namespace compiler {
V(Arm64TestAndBranch32) \ V(Arm64TestAndBranch32) \
V(Arm64TestAndBranch) \ V(Arm64TestAndBranch) \
V(Arm64CompareAndBranch32) \ V(Arm64CompareAndBranch32) \
V(Arm64Claim) \ V(Arm64ClaimForCallArguments) \
V(Arm64Poke) \ V(Arm64Poke) \
V(Arm64PokePair) \ V(Arm64PokePair) \
V(Arm64Float32Cmp) \ V(Arm64Float32Cmp) \
......
...@@ -1507,7 +1507,8 @@ void InstructionSelector::EmitPrepareArguments(NodeVector* arguments, ...@@ -1507,7 +1507,8 @@ void InstructionSelector::EmitPrepareArguments(NodeVector* arguments,
if (aligned_push_count > 0) { if (aligned_push_count > 0) {
// TODO(dcarney): it would be better to bump the csp here only // TODO(dcarney): it would be better to bump the csp here only
// and emit paired stores with increment for non c frames. // and emit paired stores with increment for non c frames.
Emit(kArm64Claim, g.NoOutput(), g.TempImmediate(aligned_push_count)); Emit(kArm64ClaimForCallArguments, g.NoOutput(),
g.TempImmediate(aligned_push_count));
} }
// Move arguments to the stack. // Move arguments to the stack.
{ {
......
...@@ -14,8 +14,9 @@ namespace compiler { ...@@ -14,8 +14,9 @@ namespace compiler {
Frame::Frame(int fixed_frame_size_in_slots) Frame::Frame(int fixed_frame_size_in_slots)
: frame_slot_count_(fixed_frame_size_in_slots), : frame_slot_count_(fixed_frame_size_in_slots),
spilled_callee_register_slot_count_(0), outgoing_parameter_slot_count_(0),
stack_slot_count_(0), callee_saved_slot_count_(0),
spill_slot_count_(0),
allocated_registers_(NULL), allocated_registers_(NULL),
allocated_double_registers_(NULL) {} allocated_double_registers_(NULL) {}
......
...@@ -16,15 +16,19 @@ namespace compiler { ...@@ -16,15 +16,19 @@ namespace compiler {
// function. Frames are usually populated by the register allocator and are used // function. Frames are usually populated by the register allocator and are used
// by Linkage to generate code for the prologue and epilogue to compiled code. // by Linkage to generate code for the prologue and epilogue to compiled code.
// //
// Frames are divided up into three regions. The first is the fixed header, // Frames are divided up into four regions.
// which always has a constant size and can be predicted before code generation // - The first is the fixed header, which always has a constant size and can be
// begins depending on the type of code being generated. The second is the // predicted before code generation begins depending on the type of code being
// region for spill slots, which is immediately below the fixed header and grows // generated.
// as the register allocator needs to spill to the stack and asks the frame for // - The second is the region for spill slots, which is immediately below the
// more space. The third region, which contains the callee-saved registers must // fixed header and grows as the register allocator needs to spill to the
// be reserved after register allocation, since its size can only be precisely // stack and asks the frame for more space.
// determined after register allocation once the number of used callee-saved // - The third region, which contains the callee-saved registers must be
// register is certain. // reserved after register allocation, since its size can only be precisely
// determined after register allocation once the number of used callee-saved
// register is certain.
// - The fourth region is used to pass arguments to other functions. It should
// be empty except when a call is being prepared.
// //
// Every pointer in a frame has a slot id. On 32-bit platforms, doubles consume // Every pointer in a frame has a slot id. On 32-bit platforms, doubles consume
// two slots. // two slots.
...@@ -35,10 +39,10 @@ namespace compiler { ...@@ -35,10 +39,10 @@ namespace compiler {
// for example JSFunctions store the function context and marker in the fixed // for example JSFunctions store the function context and marker in the fixed
// header, with slot index 2 corresponding to the current function context and 3 // header, with slot index 2 corresponding to the current function context and 3
// corresponding to the frame marker/JSFunction. The frame region immediately // corresponding to the frame marker/JSFunction. The frame region immediately
// below the fixed header contains spill slots starting a 4 for JsFunctions. The // below the fixed header contains spill slots starting at 4 for JsFunctions.
// callee-saved frame region below that starts at 4+spilled_slot_count. Callee // The callee-saved frame region below that starts at 4+spill_slot_count_.
// stack slots corresponding to parameters are accessible through negative slot // Callee stack slots corresponding to parameters are accessible through
// ids. // negative slot ids.
// //
// Every slot of a caller or callee frame is accessible by the register // Every slot of a caller or callee frame is accessible by the register
// allocator and gap resolver with a SpillSlotOperand containing its // allocator and gap resolver with a SpillSlotOperand containing its
...@@ -47,50 +51,63 @@ namespace compiler { ...@@ -47,50 +51,63 @@ namespace compiler {
// Below an example JSFunction Frame with slot ids, frame regions and contents: // Below an example JSFunction Frame with slot ids, frame regions and contents:
// //
// slot JS frame // slot JS frame
// +-----------------+---------------------------- // +-----------------+--------------------------------
// -n-1 | parameter 0 | ^ // -n-1 | parameter 0 | ^
// |- - - - - - - - -| | // |- - - - - - - - -| |
// -n | | Caller // -n | | Caller
// ... | ... | frame slots // ... | ... | frame slots
// -2 | parameter n-1 | (slot < 0) // -2 | parameter n-1 | (slot < 0)
// |- - - - - - - - -| | // |- - - - - - - - -| |
// -1 | parameter n | v // -1 | parameter n | v
// -----+-----------------+---------------------------- // -----+-----------------+--------------------------------
// 0 | return addr | ^ ^ // 0 | return addr | ^ ^
// |- - - - - - - - -| | | // |- - - - - - - - -| | |
// 1 | saved frame ptr | Fixed | // 1 | saved frame ptr | Fixed |
// |- - - - - - - - -| Header <-- frame ptr | // |- - - - - - - - -| Header <-- frame ptr |
// 2 | Context | | | // 2 | Context | | |
// |- - - - - - - - -| | | // |- - - - - - - - -| | |
// 3 |JSFunction/Marker| v | // 3 |JSFunction/Marker| v |
// +-----------------+---- | // +-----------------+---- |
// 4 | spill 1 | ^ Callee // 4 | spill 1 | ^ Callee
// |- - - - - - - - -| | frame slots // |- - - - - - - - -| | frame slots
// ... | ... | Spill slots (slot >= 0) // ... | ... | Spill slots (slot >= 0)
// |- - - - - - - - -| | | // |- - - - - - - - -| | |
// m+4 | spill m | v | // m+4 | spill m | v |
// +-----------------+---- | // +-----------------+---- |
// m+5 | callee-saved 1 | ^ | // m+5 | callee-saved 1 | ^ |
// |- - - - - - - - -| | | // |- - - - - - - - -| | |
// | ... | Callee-saved | // | ... | Callee-saved |
// |- - - - - - - - -| | | // |- - - - - - - - -| | |
// m+r+4 | callee-saved r | v v // m+r+4 | callee-saved r | v |
// -----+-----------------+----- <-- stack ptr --------- // +-----------------+---- |
// | parameter 0 | ^ |
// |- - - - - - - - -| | |
// | ... | Outgoing parameters |
// |- - - - - - - - -| | (for function calls) |
// | parameter p | v v
// -----+-----------------+----- <-- stack ptr -------------
// //
class Frame : public ZoneObject { class Frame : public ZoneObject {
public: public:
explicit Frame(int fixed_frame_size_in_slots); explicit Frame(int fixed_frame_size_in_slots);
inline int GetTotalFrameSlotCount() { return frame_slot_count_; } inline int GetTotalFrameSlotCount() const { return frame_slot_count_; }
inline int GetSavedCalleeRegisterSlotCount() { inline int GetSpToFpSlotCount() const {
return spilled_callee_register_slot_count_; return GetTotalFrameSlotCount() -
StandardFrameConstants::kFixedSlotCountAboveFp;
} }
inline int GetSpillSlotCount() { return stack_slot_count_; } inline int GetOutgoingParameterSlotCount() const {
return outgoing_parameter_slot_count_;
}
inline int GetSavedCalleeRegisterSlotCount() const {
return callee_saved_slot_count_;
}
inline int GetSpillSlotCount() const { return spill_slot_count_; }
inline void SetElidedFrameSizeInSlots(int slots) { inline void SetElidedFrameSizeInSlots(int slots) {
DCHECK_EQ(0, spilled_callee_register_slot_count_); DCHECK_EQ(0, callee_saved_slot_count_);
DCHECK_EQ(0, stack_slot_count_); DCHECK_EQ(0, spill_slot_count_);
frame_slot_count_ = slots; frame_slot_count_ = slots;
} }
...@@ -104,34 +121,47 @@ class Frame : public ZoneObject { ...@@ -104,34 +121,47 @@ class Frame : public ZoneObject {
allocated_double_registers_ = regs; allocated_double_registers_ = regs;
} }
bool DidAllocateDoubleRegisters() { bool DidAllocateDoubleRegisters() const {
return !allocated_double_registers_->IsEmpty(); return !allocated_double_registers_->IsEmpty();
} }
void AllocateOutgoingParameterSlots(int count) {
outgoing_parameter_slot_count_ += count;
frame_slot_count_ += count;
}
void ClearOutgoingParameterSlots() {
frame_slot_count_ -= outgoing_parameter_slot_count_;
outgoing_parameter_slot_count_ = 0;
}
int AlignSavedCalleeRegisterSlots() { int AlignSavedCalleeRegisterSlots() {
DCHECK_EQ(0, spilled_callee_register_slot_count_); DCHECK_EQ(0, callee_saved_slot_count_);
int delta = frame_slot_count_ & 1; int delta = frame_slot_count_ & 1;
frame_slot_count_ += delta; frame_slot_count_ += delta;
return delta; return delta;
} }
void AllocateSavedCalleeRegisterSlots(int count) { void AllocateSavedCalleeRegisterSlots(int count) {
DCHECK_EQ(0, outgoing_parameter_slot_count_);
frame_slot_count_ += count; frame_slot_count_ += count;
spilled_callee_register_slot_count_ += count; callee_saved_slot_count_ += count;
} }
int AllocateSpillSlot(int width) { int AllocateSpillSlot(int width) {
DCHECK_EQ(0, spilled_callee_register_slot_count_); DCHECK_EQ(0, outgoing_parameter_slot_count_);
DCHECK_EQ(0, callee_saved_slot_count_);
int frame_slot_count_before = frame_slot_count_; int frame_slot_count_before = frame_slot_count_;
int slot = AllocateAlignedFrameSlot(width); int slot = AllocateAlignedFrameSlot(width);
stack_slot_count_ += (frame_slot_count_ - frame_slot_count_before); spill_slot_count_ += (frame_slot_count_ - frame_slot_count_before);
return slot; return slot;
} }
int ReserveSpillSlots(size_t slot_count) { int ReserveSpillSlots(size_t slot_count) {
DCHECK_EQ(0, spilled_callee_register_slot_count_); DCHECK_EQ(0, outgoing_parameter_slot_count_);
DCHECK_EQ(0, stack_slot_count_); DCHECK_EQ(0, callee_saved_slot_count_);
stack_slot_count_ += static_cast<int>(slot_count); DCHECK_EQ(0, spill_slot_count_);
spill_slot_count_ += static_cast<int>(slot_count);
frame_slot_count_ += static_cast<int>(slot_count); frame_slot_count_ += static_cast<int>(slot_count);
return frame_slot_count_ - 1; return frame_slot_count_ - 1;
} }
...@@ -153,8 +183,9 @@ class Frame : public ZoneObject { ...@@ -153,8 +183,9 @@ class Frame : public ZoneObject {
private: private:
int frame_slot_count_; int frame_slot_count_;
int spilled_callee_register_slot_count_; int outgoing_parameter_slot_count_;
int stack_slot_count_; int callee_saved_slot_count_;
int spill_slot_count_;
BitVector* allocated_registers_; BitVector* allocated_registers_;
BitVector* allocated_double_registers_; BitVector* allocated_double_registers_;
......
...@@ -162,10 +162,8 @@ FrameOffset Linkage::GetFrameOffset(int spill_slot, Frame* frame) const { ...@@ -162,10 +162,8 @@ FrameOffset Linkage::GetFrameOffset(int spill_slot, Frame* frame) const {
} else { } else {
// No frame. Retrieve all parameters relative to stack pointer. // No frame. Retrieve all parameters relative to stack pointer.
DCHECK(spill_slot < 0); // Must be a parameter. DCHECK(spill_slot < 0); // Must be a parameter.
int offsetSpToFp = int sp_offset = offset + (frame->GetSpToFpSlotCount() * kPointerSize);
kPointerSize * (StandardFrameConstants::kFixedSlotCountAboveFp - return FrameOffset::FromStackPointer(sp_offset);
frame->GetTotalFrameSlotCount());
return FrameOffset::FromStackPointer(offset - offsetSpToFp);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment