Commit cc5808cd authored by Georgia Kouveli's avatar Georgia Kouveli Committed by Commit Bot

[arm64] Port the skipping of arguments adaptor frames.

This is a port of the improvements to the ArgumentsAdaptorTrampoline
that previously landed for x64. It skips the arguments adaptor frame
creation if the callee cannot observe the actual arguments (as indicated
by the "is_safe_to_skip_arguments_adaptor" bit on the SharedFunctionInfo),
and instead just massages the current stack frame appropriately (either
by pushing more undefineds in case of under application, or by removing
the superfluous arguments in case of over application).

Due to the 16 byte stack alignment requirement on arm64, we only skip
the arguments adaptor frame creation when the difference between the
expected and the actual argument number is even. When it is odd, we
would still need to copy the actual arguments in the existing frame to
account for the padding, which would defeat the point of the improvement.

Bug: v8:8895
Tbr: jgruber@chromium.org
Doc: http://bit.ly/v8-faster-calls-with-arguments-mismatch
Change-Id: I7f13f6f0ba86edb483e088aac145cfcf9c937fef
Reviewed-on: https://chromium-review.googlesource.com/c/1491633
Commit-Queue: Benedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59983}
parent 5bdb6b83
......@@ -2829,128 +2829,198 @@ void Builtins::Generate_ArgumentsAdaptorTrampoline(MacroAssembler* masm) {
Register argc_actual = x0; // Excluding the receiver.
Register argc_expected = x2; // Excluding the receiver.
Register function = x1;
Register argc_actual_minus_expected = x5;
Label dont_adapt_arguments, stack_overflow;
Label create_adaptor_frame, dont_adapt_arguments, stack_overflow,
adapt_arguments_in_place;
Label enough_arguments;
__ Cmp(argc_expected, SharedFunctionInfo::kDontAdaptArgumentsSentinel);
__ B(eq, &dont_adapt_arguments);
EnterArgumentsAdaptorFrame(masm);
Register copy_from = x10;
Register copy_end = x11;
Register copy_to = x12;
Register argc_to_copy = x13;
Register argc_unused_actual = x14;
Register scratch1 = x15, scratch2 = x16;
// We need slots for the expected arguments, with one extra slot for the
// receiver.
__ RecordComment("-- Stack check --");
__ Add(scratch1, argc_expected, 1);
Generate_StackOverflowCheck(masm, scratch1, &stack_overflow);
// Round up number of slots to be even, to maintain stack alignment.
__ RecordComment("-- Allocate callee frame slots --");
__ Add(scratch1, scratch1, 1);
__ Bic(scratch1, scratch1, 1);
__ Claim(scratch1, kSystemPointerSize);
__ Mov(copy_to, sp);
// Preparing the expected arguments is done in four steps, the order of
// which is chosen so we can use LDP/STP and avoid conditional branches as
// much as possible.
// (1) If we don't have enough arguments, fill the remaining expected
// arguments with undefined, otherwise skip this step.
__ Subs(scratch1, argc_actual, argc_expected);
__ Csel(argc_unused_actual, xzr, scratch1, lt);
__ Csel(argc_to_copy, argc_expected, argc_actual, ge);
__ B(ge, &enough_arguments);
// Fill the remaining expected arguments with undefined.
__ RecordComment("-- Fill slots with undefined --");
__ Sub(copy_end, copy_to, Operand(scratch1, LSL, kSystemPointerSizeLog2));
__ LoadRoot(scratch1, RootIndex::kUndefinedValue);
Label fill;
__ Bind(&fill);
__ Stp(scratch1, scratch1,
MemOperand(copy_to, 2 * kSystemPointerSize, PostIndex));
// We might write one slot extra, but that is ok because we'll overwrite it
// below.
__ Cmp(copy_end, copy_to);
__ B(hi, &fill);
// Correct copy_to, for the case where we wrote one additional slot.
__ Mov(copy_to, copy_end);
__ Bind(&enough_arguments);
// (2) Copy all of the actual arguments, or as many as we need.
Label skip_copy;
__ RecordComment("-- Copy actual arguments --");
__ Cbz(argc_to_copy, &skip_copy);
__ Add(copy_end, copy_to, Operand(argc_to_copy, LSL, kSystemPointerSizeLog2));
__ Add(copy_from, fp, 2 * kSystemPointerSize);
// Adjust for difference between actual and expected arguments.
__ Add(copy_from, copy_from,
Operand(argc_unused_actual, LSL, kSystemPointerSizeLog2));
// Copy arguments. We use load/store pair instructions, so we might overshoot
// by one slot, but since we copy the arguments starting from the last one, if
// we do overshoot, the extra slot will be overwritten later by the receiver.
Label copy_2_by_2;
__ Bind(&copy_2_by_2);
__ Ldp(scratch1, scratch2,
MemOperand(copy_from, 2 * kSystemPointerSize, PostIndex));
__ Stp(scratch1, scratch2,
MemOperand(copy_to, 2 * kSystemPointerSize, PostIndex));
__ Cmp(copy_end, copy_to);
__ B(hi, &copy_2_by_2);
__ Bind(&skip_copy);
// (3) Store padding, which might be overwritten by the receiver, if it is not
// necessary.
__ RecordComment("-- Store padding --");
__ Str(padreg, MemOperand(fp, -5 * kSystemPointerSize));
// (4) Store receiver. Calculate target address from the sp to avoid checking
// for padding. Storing the receiver will overwrite either the extra slot
// we copied with the actual arguments, if we did copy one, or the padding we
// stored above.
__ RecordComment("-- Store receiver --");
__ Add(copy_from, fp, 2 * kSystemPointerSize);
__ Ldr(scratch1,
MemOperand(copy_from, argc_actual, LSL, kSystemPointerSizeLog2));
__ Str(scratch1, MemOperand(sp, argc_expected, LSL, kSystemPointerSizeLog2));
// Arguments have been adapted. Now call the entry point.
__ RecordComment("-- Call entry point --");
__ Mov(argc_actual, argc_expected);
// x0 : expected number of arguments
// x1 : function (passed through to callee)
// x3 : new target (passed through to callee)
static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
__ LoadTaggedPointerField(x2,
FieldMemOperand(function, JSFunction::kCodeOffset));
__ CallCodeObject(x2);
// When the difference between argc_actual and argc_expected is odd, we
// create an arguments adaptor frame.
__ Sub(argc_actual_minus_expected, argc_actual, argc_expected);
__ Tbnz(argc_actual_minus_expected, 0, &create_adaptor_frame);
// Store offset of return address for deoptimizer.
masm->isolate()->heap()->SetArgumentsAdaptorDeoptPCOffset(masm->pc_offset());
// When the difference is even, check if we are allowed to adjust the
// existing frame instead.
__ LoadTaggedPointerField(
x4, FieldMemOperand(x1, JSFunction::kSharedFunctionInfoOffset));
__ Ldr(w4, FieldMemOperand(x4, SharedFunctionInfo::kFlagsOffset));
__ TestAndBranchIfAnySet(
w4, SharedFunctionInfo::IsSafeToSkipArgumentsAdaptorBit::kMask,
&adapt_arguments_in_place);
// -------------------------------------------
// Create an arguments adaptor frame.
// -------------------------------------------
__ Bind(&create_adaptor_frame);
{
__ RecordComment("-- Adapt arguments --");
EnterArgumentsAdaptorFrame(masm);
// Exit frame and return.
LeaveArgumentsAdaptorFrame(masm);
__ Ret();
Register copy_from = x10;
Register copy_end = x11;
Register copy_to = x12;
Register argc_to_copy = x13;
Register argc_unused_actual = x14;
Register scratch1 = x15, scratch2 = x16;
// We need slots for the expected arguments, with one extra slot for the
// receiver.
__ RecordComment("-- Stack check --");
__ Add(scratch1, argc_expected, 1);
Generate_StackOverflowCheck(masm, scratch1, &stack_overflow);
// Round up number of slots to be even, to maintain stack alignment.
__ RecordComment("-- Allocate callee frame slots --");
__ Add(scratch1, scratch1, 1);
__ Bic(scratch1, scratch1, 1);
__ Claim(scratch1, kSystemPointerSize);
__ Mov(copy_to, sp);
// Preparing the expected arguments is done in four steps, the order of
// which is chosen so we can use LDP/STP and avoid conditional branches as
// much as possible.
// (1) If we don't have enough arguments, fill the remaining expected
// arguments with undefined, otherwise skip this step.
Label enough_arguments;
__ Subs(scratch1, argc_actual, argc_expected);
__ Csel(argc_unused_actual, xzr, scratch1, lt);
__ Csel(argc_to_copy, argc_expected, argc_actual, ge);
__ B(ge, &enough_arguments);
// Fill the remaining expected arguments with undefined.
__ RecordComment("-- Fill slots with undefined --");
__ Sub(copy_end, copy_to, Operand(scratch1, LSL, kSystemPointerSizeLog2));
__ LoadRoot(scratch1, RootIndex::kUndefinedValue);
Label fill;
__ Bind(&fill);
__ Stp(scratch1, scratch1,
MemOperand(copy_to, 2 * kSystemPointerSize, PostIndex));
// We might write one slot extra, but that is ok because we'll overwrite it
// below.
__ Cmp(copy_end, copy_to);
__ B(hi, &fill);
// Correct copy_to, for the case where we wrote one additional slot.
__ Mov(copy_to, copy_end);
__ Bind(&enough_arguments);
// (2) Copy all of the actual arguments, or as many as we need.
Label skip_copy;
__ RecordComment("-- Copy actual arguments --");
__ Cbz(argc_to_copy, &skip_copy);
__ Add(copy_end, copy_to,
Operand(argc_to_copy, LSL, kSystemPointerSizeLog2));
__ Add(copy_from, fp, 2 * kSystemPointerSize);
// Adjust for difference between actual and expected arguments.
__ Add(copy_from, copy_from,
Operand(argc_unused_actual, LSL, kSystemPointerSizeLog2));
// Copy arguments. We use load/store pair instructions, so we might
// overshoot by one slot, but since we copy the arguments starting from the
// last one, if we do overshoot, the extra slot will be overwritten later by
// the receiver.
Label copy_2_by_2;
__ Bind(&copy_2_by_2);
__ Ldp(scratch1, scratch2,
MemOperand(copy_from, 2 * kSystemPointerSize, PostIndex));
__ Stp(scratch1, scratch2,
MemOperand(copy_to, 2 * kSystemPointerSize, PostIndex));
__ Cmp(copy_end, copy_to);
__ B(hi, &copy_2_by_2);
__ Bind(&skip_copy);
// (3) Store padding, which might be overwritten by the receiver, if it is
// not necessary.
__ RecordComment("-- Store padding --");
__ Str(padreg, MemOperand(fp, -5 * kSystemPointerSize));
// (4) Store receiver. Calculate target address from the sp to avoid
// checking for padding. Storing the receiver will overwrite either the
// extra slot we copied with the actual arguments, if we did copy one, or
// the padding we stored above.
__ RecordComment("-- Store receiver --");
__ Add(copy_from, fp, 2 * kSystemPointerSize);
__ Ldr(scratch1,
MemOperand(copy_from, argc_actual, LSL, kSystemPointerSizeLog2));
__ Str(scratch1,
MemOperand(sp, argc_expected, LSL, kSystemPointerSizeLog2));
// Arguments have been adapted. Now call the entry point.
__ RecordComment("-- Call entry point --");
__ Mov(argc_actual, argc_expected);
// x0 : expected number of arguments
// x1 : function (passed through to callee)
// x3 : new target (passed through to callee)
static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
__ LoadTaggedPointerField(
x2, FieldMemOperand(function, JSFunction::kCodeOffset));
__ CallCodeObject(x2);
// Store offset of return address for deoptimizer.
masm->isolate()->heap()->SetArgumentsAdaptorDeoptPCOffset(
masm->pc_offset());
// Exit frame and return.
LeaveArgumentsAdaptorFrame(masm);
__ Ret();
}
// Call the entry point without adapting the arguments.
__ RecordComment("-- Call without adapting args --");
// -----------------------------------------
// Adapt arguments in the existing frame.
// -----------------------------------------
__ Bind(&adapt_arguments_in_place);
{
__ RecordComment("-- Update arguments in place --");
// The callee cannot observe the actual arguments, so it's safe to just
// pass the expected arguments by massaging the stack appropriately. See
// http://bit.ly/v8-faster-calls-with-arguments-mismatch for details.
Label under_application, over_application;
__ Tbnz(argc_actual_minus_expected, kXSignBit, &under_application);
__ Bind(&over_application);
{
// Remove superfluous arguments from the stack. The number of superflous
// arguments is even.
__ RecordComment("-- Over-application --");
__ Mov(argc_actual, argc_expected);
__ Drop(argc_actual_minus_expected);
__ B(&dont_adapt_arguments);
}
__ Bind(&under_application);
{
// Fill remaining expected arguments with undefined values.
__ RecordComment("-- Under-application --");
Label fill;
Register undef_value = x16;
__ LoadRoot(undef_value, RootIndex::kUndefinedValue);
__ Bind(&fill);
__ Add(argc_actual, argc_actual, 2);
__ Push(undef_value, undef_value);
__ Cmp(argc_actual, argc_expected);
__ B(lt, &fill);
__ B(&dont_adapt_arguments);
}
}
// -------------------------------------------
// Dont adapt arguments.
// -------------------------------------------
__ Bind(&dont_adapt_arguments);
static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
__ LoadTaggedPointerField(x2,
FieldMemOperand(function, JSFunction::kCodeOffset));
__ JumpCodeObject(x2);
{
// Call the entry point without adapting the arguments.
__ RecordComment("-- Call without adapting args --");
static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
__ LoadTaggedPointerField(
x2, FieldMemOperand(function, JSFunction::kCodeOffset));
__ JumpCodeObject(x2);
}
__ Bind(&stack_overflow);
__ RecordComment("-- Stack overflow --");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment