Commit dad635ee authored by jacob.bramley's avatar jacob.bramley Committed by Commit bot

[arm64] Use SP-offset rather than FP-offset.

A64 loads and stores can have much larger positive than negative
immediate offsets, and since most frame slots are below fp, we can
significantly improve accesses by basing them on sp instead. Typical
example:

    Before                  After
    mov x16, #-416
    str x20, [fp, x16]      str x20, [jssp, #32]

Notable benchmark results include lua_binarytrees, which improves by
about 7.5% on A57 and 5% on A53. Several other asm.js benchmarks gain
2-4%.

Review URL: https://codereview.chromium.org/1376173003

Cr-Commit-Position: refs/heads/master@{#32111}
parent e44c3238
......@@ -207,6 +207,15 @@ class Arm64OperandConverter final : public InstructionOperandConverter {
DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot());
FrameOffset offset =
linkage()->GetFrameOffset(AllocatedOperand::cast(op)->index(), frame());
if (offset.from_frame_pointer()) {
int from_sp =
offset.offset() + (frame()->GetSpToFpSlotCount() * kPointerSize);
// Convert FP-offsets to SP-offsets if it results in better code.
if (Assembler::IsImmLSUnscaled(from_sp) ||
Assembler::IsImmLSScaled(from_sp, LSDoubleWord)) {
offset = FrameOffset::FromStackPointer(from_sp);
}
}
return MemOperand(offset.from_stack_pointer() ? masm->StackPointer() : fp,
offset.offset());
}
......@@ -477,6 +486,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Add(target, target, Code::kHeaderSize - kHeapObjectTag);
__ Call(target);
}
frame()->ClearOutgoingParameterSlots();
RecordCallPosition(instr);
break;
}
......@@ -491,6 +501,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Add(target, target, Code::kHeaderSize - kHeapObjectTag);
__ Jump(target);
}
frame()->ClearOutgoingParameterSlots();
break;
}
case kArchCallJSFunction: {
......@@ -506,6 +517,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
}
__ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
__ Call(x10);
frame()->ClearOutgoingParameterSlots();
RecordCallPosition(instr);
break;
}
......@@ -523,6 +535,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
AssembleDeconstructActivationRecord(stack_param_delta);
__ Ldr(x10, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
__ Jump(x10);
frame()->ClearOutgoingParameterSlots();
break;
}
case kArchLazyBailout: {
......@@ -545,6 +558,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
Register func = i.InputRegister(0);
__ CallCFunction(func, num_parameters, 0);
}
// CallCFunction only supports register arguments so we never need to call
// frame()->ClearOutgoingParameterSlots() here.
DCHECK(frame()->GetOutgoingParameterSlotCount() == 0);
break;
}
case kArchJmp:
......@@ -828,8 +844,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kArm64CompareAndBranch32:
// Pseudo instruction turned into cbz/cbnz in AssembleArchBranch.
break;
case kArm64Claim: {
case kArm64ClaimForCallArguments: {
__ Claim(i.InputInt32(0));
frame()->AllocateOutgoingParameterSlots(i.InputInt32(0));
break;
}
case kArm64Poke: {
......@@ -1229,13 +1246,6 @@ void CodeGenerator::AssembleDeoptimizerCall(
}
// TODO(dcarney): increase stack slots in frame once before first use.
static int AlignedStackSlots(int stack_slots) {
if (stack_slots & 1) stack_slots++;
return stack_slots;
}
void CodeGenerator::AssemblePrologue() {
CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
if (descriptor->kind() == CallDescriptor::kCallAddress) {
......@@ -1269,13 +1279,12 @@ void CodeGenerator::AssemblePrologue() {
stack_shrink_slots -= OsrHelper(info()).UnoptimizedFrameSlots();
}
if (stack_shrink_slots > 0) {
Register sp = __ StackPointer();
if (!sp.Is(csp)) {
__ Sub(sp, sp, stack_shrink_slots * kPointerSize);
}
__ Sub(csp, csp, AlignedStackSlots(stack_shrink_slots) * kPointerSize);
if (csp.Is(masm()->StackPointer())) {
// The system stack pointer requires 16-byte alignment at function call
// boundaries.
stack_shrink_slots += frame()->AlignSavedCalleeRegisterSlots();
}
__ Claim(stack_shrink_slots);
// Save FP registers.
CPURegList saves_fp = CPURegList(CPURegister::kFPRegister, kDRegSizeInBits,
......
......@@ -76,7 +76,7 @@ namespace compiler {
V(Arm64TestAndBranch32) \
V(Arm64TestAndBranch) \
V(Arm64CompareAndBranch32) \
V(Arm64Claim) \
V(Arm64ClaimForCallArguments) \
V(Arm64Poke) \
V(Arm64PokePair) \
V(Arm64Float32Cmp) \
......
......@@ -1507,7 +1507,8 @@ void InstructionSelector::EmitPrepareArguments(NodeVector* arguments,
if (aligned_push_count > 0) {
// TODO(dcarney): it would be better to bump the csp here only
// and emit paired stores with increment for non c frames.
Emit(kArm64Claim, g.NoOutput(), g.TempImmediate(aligned_push_count));
Emit(kArm64ClaimForCallArguments, g.NoOutput(),
g.TempImmediate(aligned_push_count));
}
// Move arguments to the stack.
{
......
......@@ -14,8 +14,9 @@ namespace compiler {
Frame::Frame(int fixed_frame_size_in_slots)
: frame_slot_count_(fixed_frame_size_in_slots),
spilled_callee_register_slot_count_(0),
stack_slot_count_(0),
outgoing_parameter_slot_count_(0),
callee_saved_slot_count_(0),
spill_slot_count_(0),
allocated_registers_(NULL),
allocated_double_registers_(NULL) {}
......
......@@ -16,15 +16,19 @@ namespace compiler {
// function. Frames are usually populated by the register allocator and are used
// by Linkage to generate code for the prologue and epilogue to compiled code.
//
// Frames are divided up into three regions. The first is the fixed header,
// which always has a constant size and can be predicted before code generation
// begins depending on the type of code being generated. The second is the
// region for spill slots, which is immediately below the fixed header and grows
// as the register allocator needs to spill to the stack and asks the frame for
// more space. The third region, which contains the callee-saved registers must
// be reserved after register allocation, since its size can only be precisely
// determined after register allocation once the number of used callee-saved
// register is certain.
// Frames are divided up into four regions.
// - The first is the fixed header, which always has a constant size and can be
// predicted before code generation begins depending on the type of code being
// generated.
// - The second is the region for spill slots, which is immediately below the
// fixed header and grows as the register allocator needs to spill to the
// stack and asks the frame for more space.
// - The third region, which contains the callee-saved registers must be
// reserved after register allocation, since its size can only be precisely
// determined after register allocation once the number of used callee-saved
// register is certain.
// - The fourth region is used to pass arguments to other functions. It should
// be empty except when a call is being prepared.
//
// Every pointer in a frame has a slot id. On 32-bit platforms, doubles consume
// two slots.
......@@ -35,10 +39,10 @@ namespace compiler {
// for example JSFunctions store the function context and marker in the fixed
// header, with slot index 2 corresponding to the current function context and 3
// corresponding to the frame marker/JSFunction. The frame region immediately
// below the fixed header contains spill slots starting a 4 for JsFunctions. The
// callee-saved frame region below that starts at 4+spilled_slot_count. Callee
// stack slots corresponding to parameters are accessible through negative slot
// ids.
// below the fixed header contains spill slots starting at 4 for JsFunctions.
// The callee-saved frame region below that starts at 4+spill_slot_count_.
// Callee stack slots corresponding to parameters are accessible through
// negative slot ids.
//
// Every slot of a caller or callee frame is accessible by the register
// allocator and gap resolver with a SpillSlotOperand containing its
......@@ -47,50 +51,63 @@ namespace compiler {
// Below an example JSFunction Frame with slot ids, frame regions and contents:
//
// slot JS frame
// +-----------------+----------------------------
// -n-1 | parameter 0 | ^
// |- - - - - - - - -| |
// -n | | Caller
// ... | ... | frame slots
// -2 | parameter n-1 | (slot < 0)
// |- - - - - - - - -| |
// -1 | parameter n | v
// -----+-----------------+----------------------------
// 0 | return addr | ^ ^
// |- - - - - - - - -| | |
// 1 | saved frame ptr | Fixed |
// |- - - - - - - - -| Header <-- frame ptr |
// 2 | Context | | |
// |- - - - - - - - -| | |
// 3 |JSFunction/Marker| v |
// +-----------------+---- |
// 4 | spill 1 | ^ Callee
// |- - - - - - - - -| | frame slots
// ... | ... | Spill slots (slot >= 0)
// |- - - - - - - - -| | |
// m+4 | spill m | v |
// +-----------------+---- |
// m+5 | callee-saved 1 | ^ |
// |- - - - - - - - -| | |
// | ... | Callee-saved |
// |- - - - - - - - -| | |
// m+r+4 | callee-saved r | v v
// -----+-----------------+----- <-- stack ptr ---------
// +-----------------+--------------------------------
// -n-1 | parameter 0 | ^
// |- - - - - - - - -| |
// -n | | Caller
// ... | ... | frame slots
// -2 | parameter n-1 | (slot < 0)
// |- - - - - - - - -| |
// -1 | parameter n | v
// -----+-----------------+--------------------------------
// 0 | return addr | ^ ^
// |- - - - - - - - -| | |
// 1 | saved frame ptr | Fixed |
// |- - - - - - - - -| Header <-- frame ptr |
// 2 | Context | | |
// |- - - - - - - - -| | |
// 3 |JSFunction/Marker| v |
// +-----------------+---- |
// 4 | spill 1 | ^ Callee
// |- - - - - - - - -| | frame slots
// ... | ... | Spill slots (slot >= 0)
// |- - - - - - - - -| | |
// m+4 | spill m | v |
// +-----------------+---- |
// m+5 | callee-saved 1 | ^ |
// |- - - - - - - - -| | |
// | ... | Callee-saved |
// |- - - - - - - - -| | |
// m+r+4 | callee-saved r | v |
// +-----------------+---- |
// | parameter 0 | ^ |
// |- - - - - - - - -| | |
// | ... | Outgoing parameters |
// |- - - - - - - - -| | (for function calls) |
// | parameter p | v v
// -----+-----------------+----- <-- stack ptr -------------
//
class Frame : public ZoneObject {
public:
explicit Frame(int fixed_frame_size_in_slots);
inline int GetTotalFrameSlotCount() { return frame_slot_count_; }
inline int GetTotalFrameSlotCount() const { return frame_slot_count_; }
inline int GetSavedCalleeRegisterSlotCount() {
return spilled_callee_register_slot_count_;
inline int GetSpToFpSlotCount() const {
return GetTotalFrameSlotCount() -
StandardFrameConstants::kFixedSlotCountAboveFp;
}
inline int GetSpillSlotCount() { return stack_slot_count_; }
inline int GetOutgoingParameterSlotCount() const {
return outgoing_parameter_slot_count_;
}
inline int GetSavedCalleeRegisterSlotCount() const {
return callee_saved_slot_count_;
}
inline int GetSpillSlotCount() const { return spill_slot_count_; }
inline void SetElidedFrameSizeInSlots(int slots) {
DCHECK_EQ(0, spilled_callee_register_slot_count_);
DCHECK_EQ(0, stack_slot_count_);
DCHECK_EQ(0, callee_saved_slot_count_);
DCHECK_EQ(0, spill_slot_count_);
frame_slot_count_ = slots;
}
......@@ -104,34 +121,47 @@ class Frame : public ZoneObject {
allocated_double_registers_ = regs;
}
bool DidAllocateDoubleRegisters() {
bool DidAllocateDoubleRegisters() const {
return !allocated_double_registers_->IsEmpty();
}
void AllocateOutgoingParameterSlots(int count) {
outgoing_parameter_slot_count_ += count;
frame_slot_count_ += count;
}
void ClearOutgoingParameterSlots() {
frame_slot_count_ -= outgoing_parameter_slot_count_;
outgoing_parameter_slot_count_ = 0;
}
int AlignSavedCalleeRegisterSlots() {
DCHECK_EQ(0, spilled_callee_register_slot_count_);
DCHECK_EQ(0, callee_saved_slot_count_);
int delta = frame_slot_count_ & 1;
frame_slot_count_ += delta;
return delta;
}
void AllocateSavedCalleeRegisterSlots(int count) {
DCHECK_EQ(0, outgoing_parameter_slot_count_);
frame_slot_count_ += count;
spilled_callee_register_slot_count_ += count;
callee_saved_slot_count_ += count;
}
int AllocateSpillSlot(int width) {
DCHECK_EQ(0, spilled_callee_register_slot_count_);
DCHECK_EQ(0, outgoing_parameter_slot_count_);
DCHECK_EQ(0, callee_saved_slot_count_);
int frame_slot_count_before = frame_slot_count_;
int slot = AllocateAlignedFrameSlot(width);
stack_slot_count_ += (frame_slot_count_ - frame_slot_count_before);
spill_slot_count_ += (frame_slot_count_ - frame_slot_count_before);
return slot;
}
int ReserveSpillSlots(size_t slot_count) {
DCHECK_EQ(0, spilled_callee_register_slot_count_);
DCHECK_EQ(0, stack_slot_count_);
stack_slot_count_ += static_cast<int>(slot_count);
DCHECK_EQ(0, outgoing_parameter_slot_count_);
DCHECK_EQ(0, callee_saved_slot_count_);
DCHECK_EQ(0, spill_slot_count_);
spill_slot_count_ += static_cast<int>(slot_count);
frame_slot_count_ += static_cast<int>(slot_count);
return frame_slot_count_ - 1;
}
......@@ -153,8 +183,9 @@ class Frame : public ZoneObject {
private:
int frame_slot_count_;
int spilled_callee_register_slot_count_;
int stack_slot_count_;
int outgoing_parameter_slot_count_;
int callee_saved_slot_count_;
int spill_slot_count_;
BitVector* allocated_registers_;
BitVector* allocated_double_registers_;
......
......@@ -162,10 +162,8 @@ FrameOffset Linkage::GetFrameOffset(int spill_slot, Frame* frame) const {
} else {
// No frame. Retrieve all parameters relative to stack pointer.
DCHECK(spill_slot < 0); // Must be a parameter.
int offsetSpToFp =
kPointerSize * (StandardFrameConstants::kFixedSlotCountAboveFp -
frame->GetTotalFrameSlotCount());
return FrameOffset::FromStackPointer(offset - offsetSpToFp);
int sp_offset = offset + (frame->GetSpToFpSlotCount() * kPointerSize);
return FrameOffset::FromStackPointer(sp_offset);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment