Commit e5b93bd5 authored by Jaideep Bajwa's avatar Jaideep Bajwa Committed by Commit Bot

PPC/s390: [Interpreter] Adapt Call bytecode handlers to drop their stack-frame.

Port 51a15140

Original Commit Message:

    This change adapts the Call bytecode handlers such that they don't require
    a stack frame. It does this by modifying the call bytecode handler to
    tail-call the Call or InterpreterPushArgsAndCall builtins. As a result, the
    callee function will return to the InterpreterEntryTrampoline when it returns
    (since this is the return address on the interpreter frame), which is
    adapted to dispatch to the next bytecode handler. The return bytecode
    handler is modified to tail-call a new InterpreterExitTramoline instead
    of returning to the InterpreterEntryTrampoline.

    Overall this significanlty reduces the amount of stack space required for
    interpreter frames, increasing the maximum depth of recursive calls from
    around 6000 to around 12,500 on x64.

R=rmcilroy@chromium.org, joransiu@ca.ibm.com, jyan@ca.ibm.com, michael_dawson@ca.ibm.com
BUG=chromium:753705
LOG=N

Change-Id: Ieac490d82098c13741080061eda762d54baf8c04
Reviewed-on: https://chromium-review.googlesource.com/639315Reviewed-by: 's avatarJunliang Yan <jyan@ca.ibm.com>
Commit-Queue: Jaideep Bajwa <bjaideep@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#47694}
parent a861ebb7
...@@ -1140,6 +1140,52 @@ static void MaybeTailCallOptimizedCodeSlot(MacroAssembler* masm, ...@@ -1140,6 +1140,52 @@ static void MaybeTailCallOptimizedCodeSlot(MacroAssembler* masm,
__ bind(&fallthrough); __ bind(&fallthrough);
} }
// Advance the current bytecode offset. This simulates what all bytecode
// handlers do upon completion of the underlying operation.
static void AdvanceBytecodeOffset(MacroAssembler* masm, Register bytecode_array,
Register bytecode_offset, Register scratch1,
Register scratch2) {
Register bytecode_size_table = scratch1;
Register bytecode = scratch2;
DCHECK(!AreAliased(bytecode_array, bytecode_offset, bytecode_size_table,
bytecode));
__ mov(
bytecode_size_table,
Operand(ExternalReference::bytecode_size_table_address(masm->isolate())));
// Load the current bytecode.
__ lbzx(bytecode, MemOperand(bytecode_array, bytecode_offset));
// Check if the bytecode is a Wide or ExtraWide prefix bytecode.
Label load_size, extra_wide;
STATIC_ASSERT(0 == static_cast<int>(interpreter::Bytecode::kWide));
STATIC_ASSERT(1 == static_cast<int>(interpreter::Bytecode::kExtraWide));
__ cmpi(bytecode, Operand(0x1));
__ bgt(&load_size);
__ beq(&extra_wide);
// Load the next bytecode and update table to the wide scaled table.
__ addi(bytecode_offset, bytecode_offset, Operand(1));
__ lbzx(bytecode, MemOperand(bytecode_array, bytecode_offset));
__ addi(bytecode_size_table, bytecode_size_table,
Operand(kIntSize * interpreter::Bytecodes::kBytecodeCount));
__ b(&load_size);
__ bind(&extra_wide);
// Load the next bytecode and update table to the extra wide scaled table.
__ addi(bytecode_offset, bytecode_offset, Operand(1));
__ lbzx(bytecode, MemOperand(bytecode_array, bytecode_offset));
__ addi(bytecode_size_table, bytecode_size_table,
Operand(2 * kIntSize * interpreter::Bytecodes::kBytecodeCount));
__ b(&load_size);
// Load the size of the current bytecode.
__ bind(&load_size);
__ ShiftLeftImm(scratch2, bytecode, Operand(2));
__ lwzx(scratch2, MemOperand(bytecode_size_table, scratch2));
__ add(bytecode_offset, bytecode_offset, scratch2);
}
// Generate code for entering a JS function with the interpreter. // Generate code for entering a JS function with the interpreter.
// On entry to the function the receiver and arguments have been pushed on the // On entry to the function the receiver and arguments have been pushed on the
// stack left to right. The actual argument count matches the formal parameter // stack left to right. The actual argument count matches the formal parameter
...@@ -1263,13 +1309,15 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1263,13 +1309,15 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ StorePX(r6, MemOperand(fp, r8)); __ StorePX(r6, MemOperand(fp, r8));
__ bind(&no_incoming_new_target_or_generator_register); __ bind(&no_incoming_new_target_or_generator_register);
// Load accumulator and dispatch table into registers. // Load accumulator with undefined.
__ LoadRoot(kInterpreterAccumulatorRegister, Heap::kUndefinedValueRootIndex); __ LoadRoot(kInterpreterAccumulatorRegister, Heap::kUndefinedValueRootIndex);
// Load the dispatch table into a register and dispatch to the bytecode
// handler at the current bytecode offset.
Label do_dispatch;
__ bind(&do_dispatch);
__ mov(kInterpreterDispatchTableRegister, __ mov(kInterpreterDispatchTableRegister,
Operand(ExternalReference::interpreter_dispatch_table_address( Operand(ExternalReference::interpreter_dispatch_table_address(
masm->isolate()))); masm->isolate())));
// Dispatch to the first bytecode handler for the function.
__ lbzx(r4, MemOperand(kInterpreterBytecodeArrayRegister, __ lbzx(r4, MemOperand(kInterpreterBytecodeArrayRegister,
kInterpreterBytecodeOffsetRegister)); kInterpreterBytecodeOffsetRegister));
__ ShiftLeftImm(ip, r4, Operand(kPointerSizeLog2)); __ ShiftLeftImm(ip, r4, Operand(kPointerSizeLog2));
...@@ -1278,9 +1326,17 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1278,9 +1326,17 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset()); masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset());
// The return value is in r3. // Get bytecode array and bytecode offset from the stack frame.
LeaveInterpreterFrame(masm, r5); __ LoadP(kInterpreterBytecodeArrayRegister,
__ blr(); MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
__ LoadP(kInterpreterBytecodeOffsetRegister,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
__ SmiUntag(kInterpreterBytecodeOffsetRegister);
// Advance to the next bytecode and dispatch.
AdvanceBytecodeOffset(masm, kInterpreterBytecodeArrayRegister,
kInterpreterBytecodeOffsetRegister, r4, r5);
__ b(&do_dispatch);
// Load debug copy of the bytecode array if it exists. // Load debug copy of the bytecode array if it exists.
// kInterpreterBytecodeArrayRegister is already loaded with // kInterpreterBytecodeArrayRegister is already loaded with
...@@ -1297,6 +1353,12 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1297,6 +1353,12 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ b(&bytecode_array_loaded); __ b(&bytecode_array_loaded);
} }
void Builtins::Generate_InterpreterExitTrampoline(MacroAssembler* masm) {
// The return value is in r3.
LeaveInterpreterFrame(masm, r5);
__ blr();
}
static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args, static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args,
Register scratch, Register scratch,
Label* stack_overflow) { Label* stack_overflow) {
...@@ -1479,20 +1541,19 @@ static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) { ...@@ -1479,20 +1541,19 @@ static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) {
} }
void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) { void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) {
// Advance the current bytecode offset stored within the given interpreter // Get bytecode array and bytecode offset from the stack frame.
// stack frame. This simulates what all bytecode handlers do upon completion __ LoadP(kInterpreterBytecodeArrayRegister,
// of the underlying operation. MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
__ LoadP(r4, MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); __ LoadP(kInterpreterBytecodeOffsetRegister,
__ LoadP(r5,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
__ LoadP(cp, MemOperand(fp, StandardFrameConstants::kContextOffset)); __ SmiUntag(kInterpreterBytecodeOffsetRegister);
{
FrameScope scope(masm, StackFrame::INTERNAL); // Advance to the next bytecode.
__ Push(kInterpreterAccumulatorRegister, r4, r5); AdvanceBytecodeOffset(masm, kInterpreterBytecodeArrayRegister,
__ CallRuntime(Runtime::kInterpreterAdvanceBytecodeOffset); kInterpreterBytecodeOffsetRegister, r4, r5);
__ Move(r5, r3); // Result is the new bytecode offset.
__ Pop(kInterpreterAccumulatorRegister); // Convert new bytecode offset to a Smi and save in the stackframe.
} __ SmiTag(r5, kInterpreterBytecodeOffsetRegister);
__ StoreP(r5, __ StoreP(r5,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
......
...@@ -1137,6 +1137,52 @@ static void MaybeTailCallOptimizedCodeSlot(MacroAssembler* masm, ...@@ -1137,6 +1137,52 @@ static void MaybeTailCallOptimizedCodeSlot(MacroAssembler* masm,
__ bind(&fallthrough); __ bind(&fallthrough);
} }
// Advance the current bytecode offset. This simulates what all bytecode
// handlers do upon completion of the underlying operation.
static void AdvanceBytecodeOffset(MacroAssembler* masm, Register bytecode_array,
Register bytecode_offset, Register scratch1,
Register scratch2) {
Register bytecode_size_table = scratch1;
Register bytecode = scratch2;
DCHECK(!AreAliased(bytecode_array, bytecode_offset, bytecode_size_table,
bytecode));
__ mov(
bytecode_size_table,
Operand(ExternalReference::bytecode_size_table_address(masm->isolate())));
// Load the current bytecode.
__ LoadlB(bytecode, MemOperand(bytecode_array, bytecode_offset));
// Check if the bytecode is a Wide or ExtraWide prefix bytecode.
Label load_size, extra_wide;
STATIC_ASSERT(0 == static_cast<int>(interpreter::Bytecode::kWide));
STATIC_ASSERT(1 == static_cast<int>(interpreter::Bytecode::kExtraWide));
__ CmpP(bytecode, Operand(0x1));
__ bgt(&load_size);
__ beq(&extra_wide);
// Load the next bytecode and update table to the wide scaled table.
__ AddP(bytecode_offset, bytecode_offset, Operand(1));
__ LoadlB(bytecode, MemOperand(bytecode_array, bytecode_offset));
__ AddP(bytecode_size_table, bytecode_size_table,
Operand(kIntSize * interpreter::Bytecodes::kBytecodeCount));
__ b(&load_size);
__ bind(&extra_wide);
// Load the next bytecode and update table to the extra wide scaled table.
__ AddP(bytecode_offset, bytecode_offset, Operand(1));
__ LoadlB(bytecode, MemOperand(bytecode_array, bytecode_offset));
__ AddP(bytecode_size_table, bytecode_size_table,
Operand(2 * kIntSize * interpreter::Bytecodes::kBytecodeCount));
__ b(&load_size);
// Load the size of the current bytecode.
__ bind(&load_size);
__ ShiftLeftP(scratch2, bytecode, Operand(2));
__ LoadlW(scratch2, MemOperand(bytecode_size_table, scratch2));
__ AddP(bytecode_offset, bytecode_offset, scratch2);
}
// Generate code for entering a JS function with the interpreter. // Generate code for entering a JS function with the interpreter.
// On entry to the function the receiver and arguments have been pushed on the // On entry to the function the receiver and arguments have been pushed on the
// stack left to right. The actual argument count matches the formal parameter // stack left to right. The actual argument count matches the formal parameter
...@@ -1258,13 +1304,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1258,13 +1304,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ StoreP(r5, MemOperand(fp, r8)); __ StoreP(r5, MemOperand(fp, r8));
__ bind(&no_incoming_new_target_or_generator_register); __ bind(&no_incoming_new_target_or_generator_register);
// Load accumulator and dispatch table into registers. // Load accumulator with undefined.
__ LoadRoot(kInterpreterAccumulatorRegister, Heap::kUndefinedValueRootIndex); __ LoadRoot(kInterpreterAccumulatorRegister, Heap::kUndefinedValueRootIndex);
// Load the dispatch table into a register and dispatch to the bytecode
// handler at the current bytecode offset.
Label do_dispatch;
__ bind(&do_dispatch);
__ mov(kInterpreterDispatchTableRegister, __ mov(kInterpreterDispatchTableRegister,
Operand(ExternalReference::interpreter_dispatch_table_address( Operand(ExternalReference::interpreter_dispatch_table_address(
masm->isolate()))); masm->isolate())));
// Dispatch to the first bytecode handler for the function.
__ LoadlB(r3, MemOperand(kInterpreterBytecodeArrayRegister, __ LoadlB(r3, MemOperand(kInterpreterBytecodeArrayRegister,
kInterpreterBytecodeOffsetRegister)); kInterpreterBytecodeOffsetRegister));
__ ShiftLeftP(ip, r3, Operand(kPointerSizeLog2)); __ ShiftLeftP(ip, r3, Operand(kPointerSizeLog2));
...@@ -1273,9 +1322,17 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1273,9 +1322,17 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset()); masm->isolate()->heap()->SetInterpreterEntryReturnPCOffset(masm->pc_offset());
// The return value is in r2. // Get bytecode array and bytecode offset from the stack frame.
LeaveInterpreterFrame(masm, r4); __ LoadP(kInterpreterBytecodeArrayRegister,
__ Ret(); MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
__ LoadP(kInterpreterBytecodeOffsetRegister,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
__ SmiUntag(kInterpreterBytecodeOffsetRegister);
// Advance to the next bytecode and dispatch.
AdvanceBytecodeOffset(masm, kInterpreterBytecodeArrayRegister,
kInterpreterBytecodeOffsetRegister, r3, r4);
__ b(&do_dispatch);
// Load debug copy of the bytecode array if it exists. // Load debug copy of the bytecode array if it exists.
// kInterpreterBytecodeArrayRegister is already loaded with // kInterpreterBytecodeArrayRegister is already loaded with
...@@ -1292,6 +1349,12 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { ...@@ -1292,6 +1349,12 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ b(&bytecode_array_loaded); __ b(&bytecode_array_loaded);
} }
void Builtins::Generate_InterpreterExitTrampoline(MacroAssembler* masm) {
// The return value is in r2.
LeaveInterpreterFrame(masm, r4);
__ Ret();
}
static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args, static void Generate_StackOverflowCheck(MacroAssembler* masm, Register num_args,
Register scratch, Register scratch,
Label* stack_overflow) { Label* stack_overflow) {
...@@ -1473,20 +1536,19 @@ static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) { ...@@ -1473,20 +1536,19 @@ static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) {
} }
void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) { void Builtins::Generate_InterpreterEnterBytecodeAdvance(MacroAssembler* masm) {
// Advance the current bytecode offset stored within the given interpreter // Get bytecode array and bytecode offset from the stack frame.
// stack frame. This simulates what all bytecode handlers do upon completion __ LoadP(kInterpreterBytecodeArrayRegister,
// of the underlying operation. MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp));
__ LoadP(r3, MemOperand(fp, InterpreterFrameConstants::kBytecodeArrayFromFp)); __ LoadP(kInterpreterBytecodeOffsetRegister,
__ LoadP(r4,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
__ LoadP(cp, MemOperand(fp, StandardFrameConstants::kContextOffset)); __ SmiUntag(kInterpreterBytecodeOffsetRegister);
{
FrameScope scope(masm, StackFrame::INTERNAL); // Advance to the next bytecode.
__ Push(kInterpreterAccumulatorRegister, r3, r4); AdvanceBytecodeOffset(masm, kInterpreterBytecodeArrayRegister,
__ CallRuntime(Runtime::kInterpreterAdvanceBytecodeOffset); kInterpreterBytecodeOffsetRegister, r3, r4);
__ Move(r4, r2); // Result is the new bytecode offset.
__ Pop(kInterpreterAccumulatorRegister); // Convert new bytecode offset to a Smi and save in the stackframe.
} __ SmiTag(r4, kInterpreterBytecodeOffsetRegister);
__ StoreP(r4, __ StoreP(r4,
MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp)); MemOperand(fp, InterpreterFrameConstants::kBytecodeOffsetFromFp));
......
...@@ -303,6 +303,12 @@ void ApiCallbackDescriptor::InitializePlatformSpecific( ...@@ -303,6 +303,12 @@ void ApiCallbackDescriptor::InitializePlatformSpecific(
data->InitializePlatformSpecific(arraysize(registers), registers); data->InitializePlatformSpecific(arraysize(registers), registers);
} }
void InterpreterExitTrampolineDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) {
Register registers[] = {kInterpreterAccumulatorRegister};
data->InitializePlatformSpecific(arraysize(registers), registers);
}
void InterpreterDispatchDescriptor::InitializePlatformSpecific( void InterpreterDispatchDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) { CallInterfaceDescriptorData* data) {
Register registers[] = { Register registers[] = {
......
...@@ -293,6 +293,12 @@ void ApiCallbackDescriptor::InitializePlatformSpecific( ...@@ -293,6 +293,12 @@ void ApiCallbackDescriptor::InitializePlatformSpecific(
data->InitializePlatformSpecific(arraysize(registers), registers); data->InitializePlatformSpecific(arraysize(registers), registers);
} }
void InterpreterExitTrampolineDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) {
Register registers[] = {kInterpreterAccumulatorRegister};
data->InitializePlatformSpecific(arraysize(registers), registers);
}
void InterpreterDispatchDescriptor::InitializePlatformSpecific( void InterpreterDispatchDescriptor::InitializePlatformSpecific(
CallInterfaceDescriptorData* data) { CallInterfaceDescriptorData* data) {
Register registers[] = { Register registers[] = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment