Commit 433572b8 authored by bjaideep's avatar bjaideep Committed by Commit bot

PPC/s390: [turbofan]: Support using push instructions for setting up tail call parameters

Port bd0d9e7d

Original commit message:

    This optimizes the passing of stack parameters in function calls.

    For some architectures (ia32/x64), using pushes when possible instead
    of bumping the stack and then storing parameters generates much
    smaller code, and in some cases is faster (e.g. when a push of a memory
    location can implement a memory-to-memory copy and thus elide an
    intermediate load. On others (e.g. ARM), the benefit is smaller, where
    it's only possible to elide direct stack pointer adjustment in certain cases
    or combine multiple register stores into a single instruction in other limited
    situations. On yet other platforms (ARM64, MIPS), there are no push instructions,
    and this optimization isn't used at all.

    Ideally, this mechanism would be used for both tail calls and normal calls,
    but "normal" calls are currently pretty efficient, and tail calls are very
    inefficient, so this CL sets the bar low for building a new mechanism to
    handle parameter pushing that only needs to raise the bar on tail calls for now.

    The key aspect of this change is that adjustment to the stack pointer
    for tail calls (and perhaps later real calls) is an explicit step separate from
    instruction selection and gap resolution, but aware of both, making it possible
    to safely recognize gap moves that are actually pushes.

R=danno@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com, mbrandy@us.ibm.com

BUG=
LOG=N

Review-Url: https://codereview.chromium.org/2123983002
Cr-Commit-Position: refs/heads/master@{#37561}
parent a53bf226
......@@ -728,21 +728,7 @@ void CodeGenerator::AssembleDeconstructFrame() {
__ LeaveFrame(StackFrame::MANUAL);
}
void CodeGenerator::AssembleDeconstructActivationRecord(int stack_param_delta) {
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
if (sp_slot_delta > 0) {
__ Add(sp, sp, sp_slot_delta * kPointerSize, r0);
}
frame_access_state()->SetFrameAccessToDefault();
}
void CodeGenerator::AssemblePrepareTailCall(int stack_param_delta) {
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
if (sp_slot_delta < 0) {
__ Add(sp, sp, sp_slot_delta * kPointerSize, r0);
frame_access_state()->IncreaseSPDelta(-sp_slot_delta);
}
void CodeGenerator::AssemblePrepareTailCall() {
if (frame_access_state()->has_frame()) {
__ RestoreFrameStateForTailCall();
}
......@@ -774,6 +760,116 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
__ bind(&done);
}
namespace {
void FlushPendingPushRegisters(MacroAssembler* masm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes) {
switch (pending_pushes->size()) {
case 0:
break;
case 1:
masm->Push((*pending_pushes)[0]);
break;
case 2:
masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
break;
case 3:
masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
(*pending_pushes)[2]);
break;
default:
UNREACHABLE();
break;
}
frame_access_state->IncreaseSPDelta(pending_pushes->size());
pending_pushes->resize(0);
}
void AddPendingPushRegister(MacroAssembler* masm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes,
Register reg) {
pending_pushes->push_back(reg);
if (pending_pushes->size() == 3 || reg.is(ip)) {
FlushPendingPushRegisters(masm, frame_access_state, pending_pushes);
}
}
void AdjustStackPointerForTailCall(
MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
ZoneVector<Register>* pending_pushes = nullptr,
bool allow_shrinkage = true) {
int current_sp_offset = state->GetSPToFPSlotCount() +
StandardFrameConstants::kFixedSlotCountAboveFp;
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
if (stack_slot_delta > 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(masm, state, pending_pushes);
}
masm->Add(sp, sp, -stack_slot_delta * kPointerSize, r0);
state->IncreaseSPDelta(stack_slot_delta);
} else if (allow_shrinkage && stack_slot_delta < 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(masm, state, pending_pushes);
}
masm->Add(sp, sp, -stack_slot_delta * kPointerSize, r0);
state->IncreaseSPDelta(stack_slot_delta);
}
}
} // namespace
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, flags, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
first_unused_stack_slot)) {
PPCOperandConverter g(this, instr);
ZoneVector<Register> pending_pushes(zone());
for (auto move : pushes) {
LocationOperand destination_location(
LocationOperand::cast(move->destination()));
InstructionOperand source(move->source());
AdjustStackPointerForTailCall(
masm(), frame_access_state(),
destination_location.index() - pending_pushes.size(),
&pending_pushes);
if (source.IsStackSlot()) {
LocationOperand source_location(LocationOperand::cast(source));
__ LoadP(ip, g.SlotToMemOperand(source_location.index()));
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
ip);
} else if (source.IsRegister()) {
LocationOperand source_location(LocationOperand::cast(source));
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
source_location.GetRegister());
} else if (source.IsImmediate()) {
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
ip);
} else {
// Pushes of non-scalar data types is not supported.
UNIMPLEMENTED();
}
move->Eliminate();
}
FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
}
AdjustStackPointerForTailCall(masm(), frame_access_state(),
first_unused_stack_slot, nullptr, false);
}
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
int first_unused_stack_slot) {
AdjustStackPointerForTailCall(masm(), frame_access_state(),
first_unused_stack_slot);
}
// Assembles an instruction after register allocation, producing machine code.
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Instruction* instr) {
......@@ -800,8 +896,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArchTailCallCodeObjectFromJSFunction:
case kArchTailCallCodeObject: {
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
if (opcode == kArchTailCallCodeObjectFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
......@@ -820,14 +914,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
DCHECK_EQ(LeaveRC, i.OutputRCBit());
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallAddress: {
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
CHECK(!instr->InputAt(0)->IsImmediate());
__ Jump(i.InputRegister(0));
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchCallJSFunction: {
......@@ -859,8 +953,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ cmp(cp, kScratchReg);
__ Assert(eq, kWrongFunctionContext);
}
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
if (opcode == kArchTailCallJSFunctionFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
......@@ -870,6 +962,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Jump(ip);
DCHECK_EQ(LeaveRC, i.OutputRCBit());
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchPrepareCallCFunction: {
......@@ -880,7 +973,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchPrepareTailCall:
AssemblePrepareTailCall(i.InputInt32(instr->InputCount() - 1));
AssemblePrepareTailCall();
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
......
......@@ -580,20 +580,7 @@ void CodeGenerator::AssembleDeconstructFrame() {
__ LeaveFrame(StackFrame::MANUAL);
}
void CodeGenerator::AssembleDeconstructActivationRecord(int stack_param_delta) {
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
if (sp_slot_delta > 0) {
__ AddP(sp, sp, Operand(sp_slot_delta * kPointerSize));
}
frame_access_state()->SetFrameAccessToDefault();
}
void CodeGenerator::AssemblePrepareTailCall(int stack_param_delta) {
int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
if (sp_slot_delta < 0) {
__ AddP(sp, sp, Operand(sp_slot_delta * kPointerSize));
frame_access_state()->IncreaseSPDelta(-sp_slot_delta);
}
void CodeGenerator::AssemblePrepareTailCall() {
if (frame_access_state()->has_frame()) {
__ RestoreFrameStateForTailCall();
}
......@@ -625,6 +612,114 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
__ bind(&done);
}
namespace {
void FlushPendingPushRegisters(MacroAssembler* masm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes) {
switch (pending_pushes->size()) {
case 0:
break;
case 1:
masm->Push((*pending_pushes)[0]);
break;
case 2:
masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
break;
case 3:
masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
(*pending_pushes)[2]);
break;
default:
UNREACHABLE();
break;
}
frame_access_state->IncreaseSPDelta(pending_pushes->size());
pending_pushes->resize(0);
}
void AddPendingPushRegister(MacroAssembler* masm,
FrameAccessState* frame_access_state,
ZoneVector<Register>* pending_pushes,
Register reg) {
pending_pushes->push_back(reg);
if (pending_pushes->size() == 3 || reg.is(ip)) {
FlushPendingPushRegisters(masm, frame_access_state, pending_pushes);
}
}
void AdjustStackPointerForTailCall(
MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
ZoneVector<Register>* pending_pushes = nullptr,
bool allow_shrinkage = true) {
int current_sp_offset = state->GetSPToFPSlotCount() +
StandardFrameConstants::kFixedSlotCountAboveFp;
int stack_slot_delta = new_slot_above_sp - current_sp_offset;
if (stack_slot_delta > 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(masm, state, pending_pushes);
}
masm->AddP(sp, sp, Operand(-stack_slot_delta * kPointerSize));
state->IncreaseSPDelta(stack_slot_delta);
} else if (allow_shrinkage && stack_slot_delta < 0) {
if (pending_pushes != nullptr) {
FlushPendingPushRegisters(masm, state, pending_pushes);
}
masm->AddP(sp, sp, Operand(-stack_slot_delta * kPointerSize));
state->IncreaseSPDelta(stack_slot_delta);
}
}
} // namespace
void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
int first_unused_stack_slot) {
CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
ZoneVector<MoveOperands*> pushes(zone());
GetPushCompatibleMoves(instr, flags, &pushes);
if (!pushes.empty() &&
(LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
first_unused_stack_slot)) {
S390OperandConverter g(this, instr);
ZoneVector<Register> pending_pushes(zone());
for (auto move : pushes) {
LocationOperand destination_location(
LocationOperand::cast(move->destination()));
InstructionOperand source(move->source());
AdjustStackPointerForTailCall(
masm(), frame_access_state(),
destination_location.index() - pending_pushes.size(),
&pending_pushes);
if (source.IsStackSlot()) {
LocationOperand source_location(LocationOperand::cast(source));
__ LoadP(ip, g.SlotToMemOperand(source_location.index()));
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
ip);
} else if (source.IsRegister()) {
LocationOperand source_location(LocationOperand::cast(source));
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
source_location.GetRegister());
} else if (source.IsImmediate()) {
AddPendingPushRegister(masm(), frame_access_state(), &pending_pushes,
ip);
} else {
// Pushes of non-scalar data types is not supported.
UNIMPLEMENTED();
}
move->Eliminate();
}
FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
}
AdjustStackPointerForTailCall(masm(), frame_access_state(),
first_unused_stack_slot, nullptr, false);
}
void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
int first_unused_stack_slot) {
AdjustStackPointerForTailCall(masm(), frame_access_state(),
first_unused_stack_slot);
}
// Assembles an instruction after register allocation, producing machine code.
CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Instruction* instr) {
......@@ -648,8 +743,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArchTailCallCodeObjectFromJSFunction:
case kArchTailCallCodeObject: {
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
if (opcode == kArchTailCallCodeObjectFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
......@@ -667,14 +760,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
RelocInfo::CODE_TARGET);
}
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchTailCallAddress: {
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
CHECK(!instr->InputAt(0)->IsImmediate());
__ Jump(i.InputRegister(0));
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchCallJSFunction: {
......@@ -703,8 +796,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ CmpP(cp, kScratchReg);
__ Assert(eq, kWrongFunctionContext);
}
int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
AssembleDeconstructActivationRecord(stack_param_delta);
if (opcode == kArchTailCallJSFunctionFromJSFunction) {
AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
i.TempRegister(0), i.TempRegister(1),
......@@ -713,6 +804,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ LoadP(ip, FieldMemOperand(func, JSFunction::kCodeEntryOffset));
__ Jump(ip);
frame_access_state()->ClearSPDelta();
frame_access_state()->SetFrameAccessToDefault();
break;
}
case kArchPrepareCallCFunction: {
......@@ -723,7 +815,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArchPrepareTailCall:
AssemblePrepareTailCall(i.InputInt32(instr->InputCount() - 1));
AssemblePrepareTailCall();
break;
case kArchCallCFunction: {
int const num_parameters = MiscField::decode(instr->opcode());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment