Commit 305aa12f authored by Anton Bikineev's avatar Anton Bikineev Committed by V8 LUCI CQ

cppgc: Save xmm registers on the stack

Microsoft x86_64 ABI considers XMM6-XMM15 as non-volatile
(callee-saved), which means that the compiler can store pointers in them.
We need to make sure they are pushed onto the stack inside the stack
scanning trampolines.

Bug: v8:11710
Change-Id: Ida804fe49d3d3b6f179ec276903a42ec8d3d86be
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2865745
Commit-Queue: Anton Bikineev <bikineev@chromium.org>
Auto-Submit: Anton Bikineev <bikineev@chromium.org>
Reviewed-by: 's avatarMichael Lippautz <mlippautz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74376}
parent f6a3ef56
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#ifdef _WIN64 #ifdef _WIN64
// We maintain 16-byte alignment at calls. There is an 8-byte return address // We maintain 16-byte alignment at calls. There is an 8-byte return address
// on the stack and we push 72 bytes which maintains 16-byte stack alignment // on the stack and we push 232 bytes which maintains 16-byte stack alignment
// at the call. // at the call.
// Source: https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention // Source: https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
asm(".globl PushAllRegistersAndIterateStack \n" asm(".globl PushAllRegistersAndIterateStack \n"
...@@ -36,6 +36,18 @@ asm(".globl PushAllRegistersAndIterateStack \n" ...@@ -36,6 +36,18 @@ asm(".globl PushAllRegistersAndIterateStack \n"
" push %r13 \n" " push %r13 \n"
" push %r14 \n" " push %r14 \n"
" push %r15 \n" " push %r15 \n"
" sub $160, %rsp \n"
// Use aligned instrs as we are certain that the stack is properly aligned.
" movdqa %xmm6, 144(%rsp) \n"
" movdqa %xmm7, 128(%rsp) \n"
" movdqa %xmm8, 112(%rsp) \n"
" movdqa %xmm9, 96(%rsp) \n"
" movdqa %xmm10, 80(%rsp) \n"
" movdqa %xmm11, 64(%rsp) \n"
" movdqa %xmm12, 48(%rsp) \n"
" movdqa %xmm13, 32(%rsp) \n"
" movdqa %xmm14, 16(%rsp) \n"
" movdqa %xmm15, (%rsp) \n"
// Pass 1st parameter (rcx) unchanged (Stack*). // Pass 1st parameter (rcx) unchanged (Stack*).
// Pass 2nd parameter (rdx) unchanged (StackVisitor*). // Pass 2nd parameter (rdx) unchanged (StackVisitor*).
// Save 3rd parameter (r8; IterateStackCallback) // Save 3rd parameter (r8; IterateStackCallback)
...@@ -45,7 +57,7 @@ asm(".globl PushAllRegistersAndIterateStack \n" ...@@ -45,7 +57,7 @@ asm(".globl PushAllRegistersAndIterateStack \n"
// Call the callback. // Call the callback.
" call *%r9 \n" " call *%r9 \n"
// Pop the callee-saved registers. // Pop the callee-saved registers.
" add $64, %rsp \n" " add $224, %rsp \n"
// Restore rbp as it was used as frame pointer. // Restore rbp as it was used as frame pointer.
" pop %rbp \n" " pop %rbp \n"
" ret \n"); " ret \n");
......
...@@ -13,8 +13,8 @@ PushAllRegistersAndIterateStack: ...@@ -13,8 +13,8 @@ PushAllRegistersAndIterateStack:
;; stack scanning. ;; stack scanning.
;; ;;
;; We maintain 16-byte alignment at calls. There is an 8-byte return address ;; We maintain 16-byte alignment at calls. There is an 8-byte return address
;; on the stack and we push 72 bytes which maintains 16-byte stack alignment ;; on the stack and we push 232 bytes which maintains 16-byte stack
;; at the call. ;; alignment at the call.
;; Source: https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention ;; Source: https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
;; ;;
;; rbp is callee-saved. Maintain proper frame pointer for debugging. ;; rbp is callee-saved. Maintain proper frame pointer for debugging.
...@@ -28,6 +28,18 @@ PushAllRegistersAndIterateStack: ...@@ -28,6 +28,18 @@ PushAllRegistersAndIterateStack:
push r13 push r13
push r14 push r14
push r15 push r15
sub rsp, 160
;; Use aligned instrs as we are certain that the stack is properly aligned.
movdqa xmmword ptr [rsp + 144], xmm6
movdqa xmmword ptr [rsp + 128], xmm7
movdqa xmmword ptr [rsp + 112], xmm8
movdqa xmmword ptr [rsp + 96], xmm9
movdqa xmmword ptr [rsp + 80], xmm10
movdqa xmmword ptr [rsp + 64], xmm11
movdqa xmmword ptr [rsp + 48], xmm12
movdqa xmmword ptr [rsp + 32], xmm13
movdqa xmmword ptr [rsp + 16], xmm14
movdqa xmmword ptr [rsp], xmm15
;; Pass 1st parameter (rcx) unchanged (Stack*). ;; Pass 1st parameter (rcx) unchanged (Stack*).
;; Pass 2nd parameter (rdx) unchanged (StackVisitor*). ;; Pass 2nd parameter (rdx) unchanged (StackVisitor*).
;; Save 3rd parameter (r8; IterateStackCallback) ;; Save 3rd parameter (r8; IterateStackCallback)
...@@ -37,7 +49,7 @@ PushAllRegistersAndIterateStack: ...@@ -37,7 +49,7 @@ PushAllRegistersAndIterateStack:
;; Call the callback. ;; Call the callback.
call r9 call r9
;; Pop the callee-saved registers. ;; Pop the callee-saved registers.
add rsp, 64 add rsp, 224
;; Restore rbp as it was used as frame pointer. ;; Restore rbp as it was used as frame pointer.
pop rbp pop rbp
ret ret
......
...@@ -271,8 +271,29 @@ TEST_F(GCStackTest, IteratePointersFindsParameterNesting8) { ...@@ -271,8 +271,29 @@ TEST_F(GCStackTest, IteratePointersFindsParameterNesting8) {
} }
#endif // !_MSC_VER || __clang__ #endif // !_MSC_VER || __clang__
// The following test uses inline assembly and has been checked to work on clang namespace {
// to verify that the stack-scanning trampoline pushes callee-saved registers. // We manually call into this function from inline assembly. Therefore we need
// to make sure that:
// 1) there is no .plt indirection (i.e. visibility is hidden);
// 2) stack is realigned in the function prologue.
V8_NOINLINE
#if !defined(V8_OS_WIN)
__attribute__((visibility("hidden")))
#endif
#ifdef __has_attribute
#if __has_attribute(force_align_arg_pointer)
__attribute__((force_align_arg_pointer))
#endif
#endif
__attribute__((used)) extern "C" void
IteratePointersNoMangling(Stack* stack, StackVisitor* visitor) {
stack->IteratePointers(visitor);
}
} // namespace
// The following tests use inline assembly and have been checked to work on
// clang to verify that the stack-scanning trampoline pushes callee-saved
// registers.
// //
// The test uses a macro loop as asm() can only be passed string literals. // The test uses a macro loop as asm() can only be passed string literals.
#ifdef __clang__ #ifdef __clang__
...@@ -305,26 +326,6 @@ TEST_F(GCStackTest, IteratePointersFindsParameterNesting8) { ...@@ -305,26 +326,6 @@ TEST_F(GCStackTest, IteratePointersFindsParameterNesting8) {
#ifdef FOR_ALL_CALLEE_SAVED_REGS #ifdef FOR_ALL_CALLEE_SAVED_REGS
namespace {
// We manually call into this function from inline assembly. Therefore we need
// to make sure that:
// 1) there is no .plt indirection (i.e. visibility is hidden);
// 2) stack is realigned in the function prologue.
V8_NOINLINE
#if !defined(V8_OS_WIN)
__attribute__((visibility("hidden")))
#endif
#ifdef __has_attribute
#if __has_attribute(force_align_arg_pointer)
__attribute__((force_align_arg_pointer))
#endif
#endif
extern "C" void
IteratePointersNoMangling(Stack* stack, StackVisitor* visitor) {
stack->IteratePointers(visitor);
}
} // namespace
TEST_F(GCStackTest, IteratePointersFindsCalleeSavedRegisters) { TEST_F(GCStackTest, IteratePointersFindsCalleeSavedRegisters) {
auto scanner = std::make_unique<StackScanner>(); auto scanner = std::make_unique<StackScanner>();
...@@ -377,6 +378,81 @@ TEST_F(GCStackTest, IteratePointersFindsCalleeSavedRegisters) { ...@@ -377,6 +378,81 @@ TEST_F(GCStackTest, IteratePointersFindsCalleeSavedRegisters) {
} }
#endif // FOR_ALL_CALLEE_SAVED_REGS #endif // FOR_ALL_CALLEE_SAVED_REGS
#if defined(__clang__) && defined(V8_TARGET_ARCH_X64) && defined(V8_OS_WIN)
#define FOR_ALL_XMM_CALLEE_SAVED_REGS(V) \
V("xmm6") \
V("xmm7") \
V("xmm8") \
V("xmm9") \
V("xmm10") \
V("xmm11") \
V("xmm12") \
V("xmm13") \
V("xmm14") \
V("xmm15")
TEST_F(GCStackTest, IteratePointersFindsCalleeSavedXMMRegisters) {
auto scanner = std::make_unique<StackScanner>();
// No check that the needle is initially not found as on some platforms it
// may be part of temporaries after setting it up through StackScanner.
// First, clear all callee-saved xmm registers.
#define CLEAR_REGISTER(reg) asm("pxor %%" reg ", %%" reg : : : reg);
FOR_ALL_XMM_CALLEE_SAVED_REGS(CLEAR_REGISTER)
#undef CLEAR_REGISTER
// Keep local raw pointers to keep instruction sequences small below.
auto* local_stack = GetStack();
auto* local_scanner = scanner.get();
// Moves |local_scanner->needle()| into a callee-saved register, leaving the
// callee-saved register as the only register referencing the needle.
// (Ignoring implementation-dependent dirty registers/stack.)
#define KEEP_ALIVE_FROM_CALLEE_SAVED(reg) \
local_scanner->Reset(); \
[local_stack, local_scanner]() V8_NOINLINE { MOVE_TO_REG_AND_CALL(reg) }(); \
EXPECT_TRUE(local_scanner->found()) \
<< "pointer in callee-saved xmm register not found. register: " << reg \
<< std::endl;
// First, test the pointer in the low quadword.
#define MOVE_TO_REG_AND_CALL(reg) \
asm volatile("mov %0, %%rax \n movq %%rax, %%" reg \
"\n mov %1, %%rcx \n mov %2, %%rdx" \
"\n call %P3" \
"\n pxor %%" reg ", %%" reg \
: \
: "r"(local_scanner->needle()), "r"(local_stack), \
"r"(local_scanner), "i"(IteratePointersNoMangling) \
: "memory", "rax", reg, "rcx", "rdx", "cc");
FOR_ALL_XMM_CALLEE_SAVED_REGS(KEEP_ALIVE_FROM_CALLEE_SAVED)
#undef MOVE_TO_REG_AND_CALL
// Then, test the pointer in the upper quadword.
#define MOVE_TO_REG_AND_CALL(reg) \
asm volatile("mov %0, %%rax \n movq %%rax, %%" reg \
"\n pshufd $0b01001110, %%" reg ", %%" reg \
"\n mov %1, %%rcx \n mov %2, %%rdx" \
"\n call %P3" \
"\n pxor %%" reg ", %%" reg \
: \
: "r"(local_scanner->needle()), "r"(local_stack), \
"r"(local_scanner), "i"(IteratePointersNoMangling) \
: "memory", "rax", reg, "rcx", "rdx", "cc");
FOR_ALL_XMM_CALLEE_SAVED_REGS(KEEP_ALIVE_FROM_CALLEE_SAVED)
#undef MOVE_TO_REG_AND_CALL
#undef KEEP_ALIVE_FROM_CALLEE_SAVED
#undef FOR_ALL_XMM_CALLEE_SAVED_REGS
}
#endif // defined(__clang__) && defined(V8_TARGET_ARCH_X64) &&
// defined(V8_OS_WIN)
#if V8_OS_LINUX && (V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64) #if V8_OS_LINUX && (V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64)
class CheckStackAlignmentVisitor final : public StackVisitor { class CheckStackAlignmentVisitor final : public StackVisitor {
public: public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment