Commit a016c9fe authored by Seth Brenith's avatar Seth Brenith Committed by Commit Bot

[arm][arm64] Use normal fp semantics in JSEntry

On arm64, Windows Performance Recorder gets confused by the fact that fp
in Builtins_JSEntry doesn't point to the saved {fp, lr} pair for the
caller frame. The expected usage of fp is documented in [1]:

  The frame pointer (x29) is required for compatibility with fast stack
  walking used by ETW and other services. It must point to the previous
  {x29, x30} pair on the stack.

In slightly more detail, the Windows function RtlWalkFrameChain is
responsible for generating stack traces during profiling with Windows
Performance Recorder, and that function relies on the rule quoted above.
Notably, it does not make any effort to read the unwinding data that one
could obtain with RtlLookupFunctionEntry. Stack walks using that data,
such as those performed by WinDbg and the cctest StackUnwindingWin64,
work fine.

It would be convenient if we could use fp in a more standard way during
JSEntry so that Windows profiling tools work correctly. (We can also
reduce JSEntry by two instructions in doing so.)

Both arm and arm64 currently put a -1 value on the stack at the location
that fp points to. This could prevent accidental access during the
epilog of JSEntry, where fp might be zero. However, we believe that this
protection is no longer necessary, and any bug that causes a read from
fp during the end of JSEntry would cause various CQ failures.

[1] https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-160

Change-Id: Iece5666129b9188fc4c12007809b50f046f4044f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2607636
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarRoss McIlroy <rmcilroy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72165}
parent 81b3372e
......@@ -464,10 +464,10 @@ namespace {
// Total size of the stack space pushed by JSEntryVariant.
// JSEntryTrampoline uses this to access on stack arguments passed to
// JSEntryVariant.
constexpr int kPushedStackSpace = kNumCalleeSaved * kPointerSize +
kPointerSize /* LR */ +
constexpr int kPushedStackSpace = kNumCalleeSaved * kPointerSize -
kPointerSize /* FP */ +
kNumDoubleCalleeSaved * kDoubleSize +
4 * kPointerSize /* r5, r6, r7, scratch */ +
5 * kPointerSize /* r5, r6, r7, fp, lr */ +
EntryFrameConstants::kCallerFPOffset;
// Assert that the EntryFrameConstants are in sync with the builtin.
......@@ -500,6 +500,7 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
// r1: microtask_queue
// Preserve all but r0 and pass them to entry_trampoline.
Label invoke, handler_entry, exit;
const RegList kCalleeSavedWithoutFp = kCalleeSaved & ~fp.bit();
// Update |pushed_stack_space| when we manipulate the stack.
int pushed_stack_space = EntryFrameConstants::kCallerFPOffset;
......@@ -508,10 +509,10 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
// Called from C, so do not pop argc and args on exit (preserve sp)
// No need to save register-passed args
// Save callee-saved registers (incl. cp and fp), sp, and lr
__ stm(db_w, sp, kCalleeSaved | lr.bit());
// Save callee-saved registers (incl. cp), but without fp
__ stm(db_w, sp, kCalleeSavedWithoutFp);
pushed_stack_space +=
kNumCalleeSaved * kPointerSize + kPointerSize /* LR */;
kNumCalleeSaved * kPointerSize - kPointerSize /* FP */;
// Save callee-saved vfp registers.
__ vstm(db_w, sp, kFirstCalleeSavedDoubleReg, kLastCalleeSavedDoubleReg);
......@@ -532,15 +533,9 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
__ Move(r5, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
masm->isolate()));
__ ldr(r5, MemOperand(r5));
{
UseScratchRegisterScope temps(masm);
Register scratch = temps.Acquire();
// Push a bad frame pointer to fail if it is used.
__ mov(scratch, Operand(-1));
__ stm(db_w, sp, r5.bit() | r6.bit() | r7.bit() | scratch.bit());
pushed_stack_space += 4 * kPointerSize /* r5, r6, r7, scratch */;
}
__ stm(db_w, sp, r5.bit() | r6.bit() | r7.bit() | fp.bit() | lr.bit());
pushed_stack_space += 5 * kPointerSize /* r5, r6, r7, fp, lr */;
Register scratch = r6;
......@@ -628,19 +623,21 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
__ str(r3, MemOperand(scratch));
// Reset the stack to the callee saved registers.
__ add(sp, sp, Operand(-EntryFrameConstants::kCallerFPOffset));
__ add(sp, sp,
Operand(-EntryFrameConstants::kCallerFPOffset -
kSystemPointerSize /* already popped one */));
// Restore callee-saved registers and return.
#ifdef DEBUG
if (FLAG_debug_code) {
__ mov(lr, Operand(pc));
}
#endif
__ ldm(ia_w, sp, fp.bit() | lr.bit());
// Restore callee-saved vfp registers.
__ vldm(ia_w, sp, kFirstCalleeSavedDoubleReg, kLastCalleeSavedDoubleReg);
__ ldm(ia_w, sp, kCalleeSaved | pc.bit());
__ ldm(ia_w, sp, kCalleeSavedWithoutFp);
__ mov(pc, lr);
// Emit constant pool.
__ CheckConstPool(true, false);
}
} // namespace
......
......@@ -603,12 +603,11 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
NoRootArrayScope no_root_array(masm);
#if defined(V8_OS_WIN)
// Windows ARM64 relies on a frame pointer (fp/x29 which are aliases to each
// other) chain to do stack unwinding, but JSEntry breaks that by setting fp
// to point to bad_frame_pointer below. To fix unwind information for this
// case, JSEntry registers the offset (from current fp to the caller's fp
// saved by PushCalleeSavedRegisters on stack) to xdata_encoder which then
// emits the offset value as part of result unwind data accordingly.
// In order to allow Windows debugging tools to reconstruct a call stack, we
// must generate information describing how to recover at least fp, sp, and
// pc for the calling frame. Here, JSEntry registers offsets to
// xdata_encoder which then emits the offset values as part of the unwind
// data accordingly.
win64_unwindinfo::XdataEncoder* xdata_encoder = masm->GetXdataEncoder();
if (xdata_encoder) {
xdata_encoder->onFramePointerAdjustment(
......@@ -627,49 +626,52 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
__ Mov(kRootRegister, x0);
}
// Set up fp. It points to the {fp, lr} pair pushed as the last step in
// PushCalleeSavedRegisters.
STATIC_ASSERT(
EntryFrameConstants::kCalleeSavedRegisterBytesPushedAfterFpLrPair == 0);
STATIC_ASSERT(EntryFrameConstants::kOffsetToCalleeSavedRegisters == 0);
__ Mov(fp, sp);
// Build an entry frame (see layout below).
int64_t bad_frame_pointer = -1L; // Bad frame pointer to fail if it is used.
__ Mov(x13, bad_frame_pointer);
// Push frame type markers.
__ Mov(x12, StackFrame::TypeToMarker(type));
__ Push(x12, xzr);
__ Mov(x11, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
masm->isolate()));
__ Ldr(x10, MemOperand(x11));
__ Ldr(x10, MemOperand(x11)); // x10 = C entry FP.
// x13 (the bad frame pointer) is the first item pushed.
STATIC_ASSERT(EntryFrameConstants::kOffsetToCalleeSavedRegisters ==
1 * kSystemPointerSize);
__ Push(x13, x12, xzr, x10);
// Set up fp.
__ Sub(fp, sp, EntryFrameConstants::kCallerFPOffset);
// Push the JS entry frame marker. Also set js_entry_sp if this is the
// outermost JS call.
// Set js_entry_sp if this is the outermost JS call.
Label done;
ExternalReference js_entry_sp = ExternalReference::Create(
IsolateAddressId::kJSEntrySPAddress, masm->isolate());
__ Mov(x10, js_entry_sp);
__ Ldr(x11, MemOperand(x10));
__ Mov(x12, js_entry_sp);
__ Ldr(x11, MemOperand(x12)); // x11 = previous JS entry SP.
// Select between the inner and outermost frame marker, based on the JS entry
// sp. We assert that the inner marker is zero, so we can use xzr to save a
// move instruction.
DCHECK_EQ(StackFrame::INNER_JSENTRY_FRAME, 0);
__ Cmp(x11, 0); // If x11 is zero, this is the outermost frame.
__ Csel(x12, xzr, StackFrame::OUTERMOST_JSENTRY_FRAME, ne);
// x11 = JS entry frame marker.
__ Csel(x11, xzr, StackFrame::OUTERMOST_JSENTRY_FRAME, ne);
__ B(ne, &done);
__ Str(fp, MemOperand(x10));
__ Str(fp, MemOperand(x12));
__ Bind(&done);
__ Push(x12, padreg);
__ Push(x10, x11);
// The frame set up looks like this:
// sp[0] : padding.
// sp[1] : JS entry frame marker.
// sp[2] : C entry FP.
// sp[3] : stack frame marker.
// sp[4] : stack frame marker.
// sp[5] : bad frame pointer 0xFFF...FF <- fp points here.
// sp[0] : JS entry frame marker.
// sp[1] : C entry FP.
// sp[2] : stack frame marker (0).
// sp[3] : stack frame marker (type).
// sp[4] : saved fp <- fp points here.
// sp[5] : saved lr
// sp[6,24) : other saved registers
// Jump to a faked try block that does the invoke, with a faked catch
// block that sets the pending exception.
......@@ -690,7 +692,7 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
// Caught exception: Store result (exception) in the pending exception
// field in the JSEnv and return a failure sentinel. Coming in here the
// fp will be invalid because the PushTryHandler below sets it to 0 to
// fp will be invalid because UnwindAndFindHandler sets it to 0 to
// signal the existence of the JSEntry frame.
__ Mov(x10,
ExternalReference::Create(IsolateAddressId::kPendingExceptionAddress,
......@@ -747,18 +749,19 @@ void Generate_JSEntryVariant(MacroAssembler* masm, StackFrame::Type type,
// x0 holds the result.
// The stack pointer points to the top of the entry frame pushed on entry from
// C++ (at the beginning of this stub):
// sp[0] : padding.
// sp[1] : JS entry frame marker.
// sp[2] : C entry FP.
// sp[3] : stack frame marker.
// sp[4] : stack frame marker.
// sp[5] : bad frame pointer 0xFFF...FF <- fp points here.
// sp[0] : JS entry frame marker.
// sp[1] : C entry FP.
// sp[2] : stack frame marker (0).
// sp[3] : stack frame marker (type).
// sp[4] : saved fp <- fp might point here, or might be zero.
// sp[5] : saved lr
// sp[6,24) : other saved registers
// Check if the current stack frame is marked as the outermost JS frame.
Label non_outermost_js_2;
{
Register c_entry_fp = x11;
__ PeekPair(x10, c_entry_fp, 1 * kSystemPointerSize);
__ PeekPair(x10, c_entry_fp, 0);
__ Cmp(x10, StackFrame::OUTERMOST_JSENTRY_FRAME);
__ B(ne, &non_outermost_js_2);
__ Mov(x12, js_entry_sp);
......
......@@ -1214,20 +1214,20 @@ void MacroAssembler::PushCalleeSavedRegisters() {
stp(d10, d11, tos);
stp(d8, d9, tos);
STATIC_ASSERT(
EntryFrameConstants::kCalleeSavedRegisterBytesPushedBeforeFpLrPair ==
8 * kSystemPointerSize);
stp(x29, x30, tos); // fp, lr
STATIC_ASSERT(
EntryFrameConstants::kCalleeSavedRegisterBytesPushedAfterFpLrPair ==
10 * kSystemPointerSize);
stp(x27, x28, tos);
stp(x25, x26, tos);
stp(x23, x24, tos);
stp(x21, x22, tos);
stp(x19, x20, tos);
STATIC_ASSERT(
EntryFrameConstants::kCalleeSavedRegisterBytesPushedBeforeFpLrPair ==
18 * kSystemPointerSize);
stp(x29, x30, tos); // fp, lr
STATIC_ASSERT(
EntryFrameConstants::kCalleeSavedRegisterBytesPushedAfterFpLrPair == 0);
}
}
......@@ -1238,12 +1238,13 @@ void MacroAssembler::PopCalleeSavedRegisters() {
MemOperand tos(sp, 2 * kXRegSize, PostIndex);
ldp(x29, x30, tos); // fp, lr
ldp(x19, x20, tos);
ldp(x21, x22, tos);
ldp(x23, x24, tos);
ldp(x25, x26, tos);
ldp(x27, x28, tos);
ldp(x29, x30, tos);
ldp(d8, d9, tos);
ldp(d10, d11, tos);
......
......@@ -12,7 +12,7 @@ void GetCalleeSavedRegistersFromEntryFrame(void* fp,
RegisterState* register_state) {
const i::Address base_addr =
reinterpret_cast<i::Address>(fp) +
i::EntryFrameConstants::kDirectCallerRRegistersOffset;
i::EntryFrameConstants::kDirectCallerGeneralRegistersOffset;
if (!register_state->callee_saved) {
register_state->callee_saved = std::make_unique<CalleeSavedRegisters>();
......
......@@ -16,20 +16,17 @@ namespace internal {
// The layout of an EntryFrame is as follows:
// TOP OF THE STACK LOWEST ADDRESS
// +---------------------+-----------------------
// 0 | bad frame pointer | <-- frame ptr
// | (0xFFF.. FF) |
// 0 | saved fp (r11) | <-- frame ptr
// |- - - - - - - - - - -|
// 1..2 | saved register d8 |
// ... | ... |
// 15..16 | saved register d15 |
// 1 | saved lr (r14) |
// |- - - - - - - - - - -|
// 17 | saved register r4 |
// 2..3 | saved register d8 |
// ... | ... |
// 23 | saved register r10 |
// |- - - - - - - - - - -|
// 24 | saved fp (r11) |
// 16..17 | saved register d15 |
// |- - - - - - - - - - -|
// 25 | saved lr (r14) |
// 18 | saved register r4 |
// ... | ... |
// 24 | saved register r10 |
// -----+---------------------+-----------------------
// BOTTOM OF THE STACK HIGHEST ADDRESS
class EntryFrameConstants : public AllStatic {
......@@ -43,19 +40,19 @@ class EntryFrameConstants : public AllStatic {
static constexpr int kArgvOffset = +1 * kSystemPointerSize;
// These offsets refer to the immediate caller (i.e a native frame).
static constexpr int kDirectCallerRRegistersOffset =
/* bad frame pointer (-1) */
kPointerSize +
/* d8...d15 */
kNumDoubleCalleeSaved * kDoubleSize;
static constexpr int kDirectCallerFPOffset =
kDirectCallerRRegistersOffset +
/* r4...r10 (i.e. callee saved without fp) */
(kNumCalleeSaved - 1) * kPointerSize;
static constexpr int kDirectCallerFPOffset = 0;
static constexpr int kDirectCallerPCOffset =
kDirectCallerFPOffset + 1 * kSystemPointerSize;
static constexpr int kDirectCallerGeneralRegistersOffset =
kDirectCallerPCOffset +
/* saved caller PC */
kSystemPointerSize +
/* d8...d15 */
kNumDoubleCalleeSaved * kDoubleSize;
static constexpr int kDirectCallerSPOffset =
kDirectCallerPCOffset + 1 * kSystemPointerSize;
kDirectCallerGeneralRegistersOffset +
/* r4...r10 (i.e. callee saved without fp) */
(kNumCalleeSaved - 1) * kSystemPointerSize;
};
class WasmCompileLazyFrameConstants : public TypedFrameConstants {
......
......@@ -18,20 +18,17 @@ namespace internal {
// BOTTOM OF THE STACK HIGHEST ADDRESS
// slot Entry frame
// +---------------------+-----------------------
// -20 | saved register d15 |
// -19 | saved register d15 |
// ... | ... |
// -13 | saved register d8 |
// -12 | saved register d8 |
// |- - - - - - - - - - -|
// -12 | saved lr (x30) |
// |- - - - - - - - - - -|
// -11 | saved fp (x29) |
// |- - - - - - - - - - -|
// -10 | saved register x28 |
// -11 | saved register x28 |
// ... | ... |
// -1 | saved register x19 |
// -2 | saved register x19 |
// |- - - - - - - - - - -|
// 0 | bad frame pointer | <-- frame ptr
// | (0xFFF.. FF) |
// -1 | saved lr (x30) |
// |- - - - - - - - - - -|
// 0 | saved fp (x29) | <-- frame ptr
// |- - - - - - - - - - -|
// 1 | stack frame marker |
// | (ENTRY) |
......@@ -41,10 +38,8 @@ namespace internal {
// |- - - - - - - - - - -|
// 3 | C entry FP |
// |- - - - - - - - - - -|
// 4 | JS entry frame |
// 4 | JS entry frame | <-- stack ptr
// | marker |
// |- - - - - - - - - - -|
// 5 | padding | <-- stack ptr
// -----+---------------------+-----------------------
// TOP OF THE STACK LOWEST ADDRESS
//
......@@ -53,16 +48,15 @@ class EntryFrameConstants : public AllStatic {
// This is the offset to where JSEntry pushes the current value of
// Isolate::c_entry_fp onto the stack.
static constexpr int kCallerFPOffset = -3 * kSystemPointerSize;
static constexpr int kFixedFrameSize = 6 * kSystemPointerSize;
static constexpr int kFixedFrameSize = 4 * kSystemPointerSize;
// The following constants are defined so we can static-assert their values
// near the relevant JSEntry assembly code, not because they're actually very
// useful.
static constexpr int kCalleeSavedRegisterBytesPushedBeforeFpLrPair =
8 * kSystemPointerSize;
static constexpr int kCalleeSavedRegisterBytesPushedAfterFpLrPair =
10 * kSystemPointerSize;
static constexpr int kOffsetToCalleeSavedRegisters = 1 * kSystemPointerSize;
18 * kSystemPointerSize;
static constexpr int kCalleeSavedRegisterBytesPushedAfterFpLrPair = 0;
static constexpr int kOffsetToCalleeSavedRegisters = 0;
// These offsets refer to the immediate caller (a native frame), not to the
// previous JS exit frame like kCallerFPOffset above.
......
......@@ -42,29 +42,28 @@ void CheckCalleeSavedRegisters(const RegisterState& register_state) {}
#elif V8_TARGET_ARCH_ARM
// How much the JSEntry frame occupies in the stack.
constexpr int kJSEntryFrameSpace = 27;
constexpr int kJSEntryFrameSpace = 26;
// Offset where the FP, PC and SP live from the beginning of the JSEntryFrame.
constexpr int kFPOffset = 24;
constexpr int kPCOffset = 25;
constexpr int kSPOffset = 26;
constexpr int kFPOffset = 0;
constexpr int kPCOffset = 1;
constexpr int kSPOffset = 25;
// Builds the stack from {stack} as it is explained in frame-constants-arm.h.
void BuildJSEntryStack(uintptr_t* stack) {
stack[0] = -1; // the bad frame pointer (0xF..F)
stack[0] = reinterpret_cast<uintptr_t>(stack); // saved FP.
stack[1] = 100; // Return address into C++ code (i.e lr/pc)
// Set d8 = 150, d9 = 151, ..., d15 = 157.
for (int i = 0; i < 8; ++i) {
// Double registers occupy two slots. Therefore, upper bits are zeroed.
stack[1 + i * 2] = 0;
stack[1 + i * 2 + 1] = 150 + i;
stack[2 + i * 2] = 0;
stack[2 + i * 2 + 1] = 150 + i;
}
// Set r4 = 160, ..., r10 = 166.
for (int i = 0; i < 7; ++i) {
stack[17 + i] = 160 + i;
stack[18 + i] = 160 + i;
}
stack[24] = reinterpret_cast<uintptr_t>(stack + 24); // saved FP.
stack[25] = 100; // Return address into C++ code (i.e lr/pc)
stack[26] = reinterpret_cast<uintptr_t>(stack + 26); // saved SP.
stack[25] = reinterpret_cast<uintptr_t>(stack + 25); // saved SP.
}
// Checks that the values in the calee saved registers are the same as the ones
......@@ -81,27 +80,26 @@ void CheckCalleeSavedRegisters(const RegisterState& register_state) {
#elif V8_TARGET_ARCH_ARM64
// How much the JSEntry frame occupies in the stack.
constexpr int kJSEntryFrameSpace = 22;
constexpr int kJSEntryFrameSpace = 21;
// Offset where the FP, PC and SP live from the beginning of the JSEntryFrame.
constexpr int kFPOffset = 11;
constexpr int kPCOffset = 12;
constexpr int kSPOffset = 21;
constexpr int kFPOffset = 0;
constexpr int kPCOffset = 1;
constexpr int kSPOffset = 20;
// Builds the stack from {stack} as it is explained in frame-constants-arm64.h.
void BuildJSEntryStack(uintptr_t* stack) {
stack[0] = -1; // the bad frame pointer (0xF..F)
stack[0] = reinterpret_cast<uintptr_t>(stack); // saved FP.
stack[1] = 100; // Return address into C++ code (i.e lr/pc)
// Set x19 = 150, ..., x28 = 159.
for (int i = 0; i < 10; ++i) {
stack[1 + i] = 150 + i;
stack[2 + i] = 150 + i;
}
stack[11] = reinterpret_cast<uintptr_t>(stack + 11); // saved FP.
stack[12] = 100; // Return address into C++ code (i.e lr/pc)
// Set d8 = 160, ..., d15 = 167.
for (int i = 0; i < 8; ++i) {
stack[13 + i] = 160 + i;
stack[12 + i] = 160 + i;
}
stack[21] = reinterpret_cast<uintptr_t>(stack + 21); // saved SP.
stack[20] = reinterpret_cast<uintptr_t>(stack + 20); // saved SP.
}
// Dummy method since we don't save callee saved registers in arm64.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment