Commit 76d47dec authored by Clemens Backes's avatar Clemens Backes Committed by V8 LUCI CQ

[shadow stack] Drop frames from the shadow stack for exception handling

For low-cost exception handling, it's important to be able to quickly
drop frames from the stack until reaching the exception handler.
The Intel shadow stack offers an instruction to avoid offending
stack discipline, incsspq, which drops N values from the stack.

This CL integrates that instruction for v8 exception handling.

Bug: v8:11246
Change-Id: I908f0ab8bb3de6c36e6078e27b65132287328f2d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3289637Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMichael Lippautz <mlippautz@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#78469}
parent 60539905
......@@ -499,6 +499,9 @@ CPU::CPU()
has_avx_ = (cpu_info[2] & 0x10000000) != 0;
has_avx2_ = (cpu_info7[1] & 0x00000020) != 0;
has_fma3_ = (cpu_info[2] & 0x00001000) != 0;
// CET shadow stack feature flag. See
// https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
has_cetss_ = (cpu_info7[2] & 0x00000080) != 0;
// "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1."
// See https://lwn.net/Articles/301888/
// This is checking for any hypervisor. Hypervisors may choose not to
......
......@@ -101,6 +101,7 @@ class V8_BASE_EXPORT CPU final {
bool has_lzcnt() const { return has_lzcnt_; }
bool has_popcnt() const { return has_popcnt_; }
bool is_atom() const { return is_atom_; }
bool has_cetss() const { return has_cetss_; }
bool has_non_stop_time_stamp_counter() const {
return has_non_stop_time_stamp_counter_;
}
......@@ -159,6 +160,7 @@ class V8_BASE_EXPORT CPU final {
bool has_sse41_;
bool has_sse42_;
bool is_atom_;
bool has_cetss_;
bool has_osxsave_;
bool has_avx_;
bool has_avx2_;
......
......@@ -3981,6 +3981,10 @@ void Builtins::Generate_CEntry(MacroAssembler* masm, int result_size,
IsolateAddressId::kPendingHandlerFPAddress, masm->isolate());
ExternalReference pending_handler_sp_address = ExternalReference::Create(
IsolateAddressId::kPendingHandlerSPAddress, masm->isolate());
ExternalReference num_frames_above_pending_handler_address =
ExternalReference::Create(
IsolateAddressId::kNumFramesAbovePendingHandlerAddress,
masm->isolate());
// Ask the runtime for help to determine the handler. This will set rax to
// contain the current pending exception, don't clobber it.
......@@ -3994,6 +3998,12 @@ void Builtins::Generate_CEntry(MacroAssembler* masm, int result_size,
__ PrepareCallCFunction(3);
__ CallCFunction(find_handler, 3);
}
// Drop frames from the shadow stack.
__ movq(rcx, masm->ExternalReferenceAsOperand(
num_frames_above_pending_handler_address));
__ IncsspqIfSupported(rcx, kScratchRegister);
// Retrieve the handler context, SP and FP.
__ movq(rsi,
masm->ExternalReferenceAsOperand(pending_handler_context_address));
......
......@@ -224,6 +224,7 @@ CpuFeatureScope::~CpuFeatureScope() {
bool CpuFeatures::initialized_ = false;
bool CpuFeatures::supports_wasm_simd_128_ = false;
bool CpuFeatures::supports_cetss_ = false;
unsigned CpuFeatures::supported_ = 0;
unsigned CpuFeatures::icache_line_size_ = 0;
unsigned CpuFeatures::dcache_line_size_ = 0;
......
......@@ -27,6 +27,7 @@ enum CpuFeature {
LZCNT,
POPCNT,
INTEL_ATOM,
CETSS,
#elif V8_TARGET_ARCH_ARM
// - Standard configurations. The baseline is ARMv6+VFPv2.
......@@ -147,6 +148,7 @@ class V8_EXPORT_PRIVATE CpuFeatures : public AllStatic {
// at runtime in builtins using an extern ref. Other callers should use
// CpuFeatures::SupportWasmSimd128().
static bool supports_wasm_simd_128_;
static bool supports_cetss_;
};
} // namespace internal
......
......@@ -660,6 +660,11 @@ ExternalReference ExternalReference::address_of_wasm_int32_overflow_as_float() {
reinterpret_cast<Address>(&wasm_int32_overflow_as_float));
}
ExternalReference ExternalReference::supports_cetss_address() {
return ExternalReference(
reinterpret_cast<Address>(&CpuFeatures::supports_cetss_));
}
ExternalReference
ExternalReference::address_of_enable_experimental_regexp_engine() {
return ExternalReference(&FLAG_enable_experimental_regexp_engine);
......
......@@ -249,6 +249,7 @@ class StatsCounter;
V(address_of_wasm_int32_max_as_double, "wasm_int32_max_as_double") \
V(address_of_wasm_uint32_max_as_double, "wasm_uint32_max_as_double") \
V(address_of_wasm_int32_overflow_as_float, "wasm_int32_overflow_as_float") \
V(supports_cetss_address, "CpuFeatures::supports_cetss_address") \
V(write_barrier_marking_from_code_function, "WriteBarrier::MarkingFromCode") \
V(call_enqueue_microtask_function, "MicrotaskQueue::CallEnqueueMicrotask") \
V(call_enter_context_function, "call_enter_context_function") \
......
......@@ -131,7 +131,11 @@ void CpuFeatures::ProbeImpl(bool cross_compile) {
// at runtime in builtins using an extern ref. Other callers should use
// CpuFeatures::SupportWasmSimd128().
CpuFeatures::supports_wasm_simd_128_ = CpuFeatures::SupportsWasmSimd128();
#endif // V8_HOST_ARCH_X64
if (cpu.has_cetss()) SetSupported(CETSS);
// The static variable is used for codegen of certain CETSS instructions.
CpuFeatures::supports_cetss_ = IsSupported(CETSS);
#endif // V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
}
void CpuFeatures::PrintTarget() {}
......@@ -2114,6 +2118,15 @@ void Assembler::pushfq() {
emit(0x9C);
}
void Assembler::incsspq(Register number_of_words) {
EnsureSpace ensure_space(this);
emit(0xF3);
emit_rex_64(number_of_words);
emit(0x0F);
emit(0xAE);
emit(0xE8 | number_of_words.low_bits());
}
void Assembler::ret(int imm16) {
EnsureSpace ensure_space(this);
DCHECK(is_uint16(imm16));
......
......@@ -544,6 +544,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void popq(Register dst);
void popq(Operand dst);
void incsspq(Register number_of_words);
void leave();
// Moves
......
......@@ -2287,6 +2287,23 @@ void TurboAssembler::Ret(int bytes_dropped, Register scratch) {
}
}
void TurboAssembler::IncsspqIfSupported(Register number_of_words,
Register scratch) {
// Optimized code can validate at runtime whether the cpu supports the
// incsspq instruction, so it shouldn't use this method.
CHECK(isolate()->IsGeneratingEmbeddedBuiltins());
DCHECK_NE(number_of_words, scratch);
Label not_supported;
ExternalReference supports_cetss =
ExternalReference::supports_cetss_address();
Operand supports_cetss_operand =
ExternalReferenceAsOperand(supports_cetss, scratch);
cmpb(supports_cetss_operand, Immediate(0));
j(equal, &not_supported, Label::kNear);
incsspq(number_of_words);
bind(&not_supported);
}
void MacroAssembler::CmpObjectType(Register heap_object, InstanceType type,
Register map) {
LoadMap(map, heap_object);
......
......@@ -67,6 +67,11 @@ class V8_EXPORT_PRIVATE TurboAssembler
void Ret();
// Call incsspq with {number_of_words} only if the cpu supports it.
// NOTE: This shouldn't be embedded in optimized code, since the check
// for CPU support would be redundant (we could check at compiler time).
void IncsspqIfSupported(Register number_of_words, Register scratch);
// Return and drop arguments from stack, where the number of arguments
// may be bigger than 2^16 - 1. Requires a scratch register.
void Ret(int bytes_dropped, Register scratch);
......
......@@ -1729,18 +1729,19 @@ enum class BlockingBehavior { kBlock, kDontBlock };
enum class ConcurrencyMode { kNotConcurrent, kConcurrent };
#define FOR_EACH_ISOLATE_ADDRESS_NAME(C) \
C(Handler, handler) \
C(CEntryFP, c_entry_fp) \
C(CFunction, c_function) \
C(Context, context) \
C(PendingException, pending_exception) \
C(PendingHandlerContext, pending_handler_context) \
C(PendingHandlerEntrypoint, pending_handler_entrypoint) \
C(PendingHandlerConstantPool, pending_handler_constant_pool) \
C(PendingHandlerFP, pending_handler_fp) \
C(PendingHandlerSP, pending_handler_sp) \
C(ExternalCaughtException, external_caught_exception) \
#define FOR_EACH_ISOLATE_ADDRESS_NAME(C) \
C(Handler, handler) \
C(CEntryFP, c_entry_fp) \
C(CFunction, c_function) \
C(Context, context) \
C(PendingException, pending_exception) \
C(PendingHandlerContext, pending_handler_context) \
C(PendingHandlerEntrypoint, pending_handler_entrypoint) \
C(PendingHandlerConstantPool, pending_handler_constant_pool) \
C(PendingHandlerFP, pending_handler_fp) \
C(PendingHandlerSP, pending_handler_sp) \
C(NumFramesAbovePendingHandler, num_frames_above_pending_handler) \
C(ExternalCaughtException, external_caught_exception) \
C(JSEntrySP, js_entry_sp)
enum IsolateAddressId {
......
......@@ -2001,6 +2001,10 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += 2;
} else if (opcode == 0xE6) {
current += PrintOperands("cvtdq2pd", XMMREG_XMMOPER_OP_ORDER, current);
} else if (opcode == 0xAE) {
// incssp[d|q]
AppendToBuffer("incssp%c ", operand_size_code());
current += PrintRightOperand(current);
} else {
UnimplementedInstruction();
}
......
......@@ -1772,7 +1772,7 @@ Object Isolate::UnwindAndFindHandler() {
auto FoundHandler = [&](Context context, Address instruction_start,
intptr_t handler_offset,
Address constant_pool_address, Address handler_sp,
Address handler_fp) {
Address handler_fp, int num_frames_above_handler) {
// Store information to be consumed by the CEntry.
thread_local_top()->pending_handler_context_ = context;
thread_local_top()->pending_handler_entrypoint_ =
......@@ -1780,6 +1780,8 @@ Object Isolate::UnwindAndFindHandler() {
thread_local_top()->pending_handler_constant_pool_ = constant_pool_address;
thread_local_top()->pending_handler_fp_ = handler_fp;
thread_local_top()->pending_handler_sp_ = handler_sp;
thread_local_top()->num_frames_above_pending_handler_ =
num_frames_above_handler;
// Return and clear pending exception. The contract is that:
// (1) the pending exception is stored in one place (no duplication), and
......@@ -1794,10 +1796,11 @@ Object Isolate::UnwindAndFindHandler() {
// Special handling of termination exceptions, uncatchable by JavaScript and
// Wasm code, we unwind the handlers until the top ENTRY handler is found.
bool catchable_by_js = is_catchable_by_javascript(exception);
int visited_frames = 0;
// Compute handler and stack unwinding information by performing a full walk
// over the stack and dispatching according to the frame type.
for (StackFrameIterator iter(this);; iter.Advance()) {
for (StackFrameIterator iter(this);; iter.Advance(), visited_frames++) {
// Handler must exist.
DCHECK(!iter.done());
......@@ -1818,7 +1821,7 @@ Object Isolate::UnwindAndFindHandler() {
return FoundHandler(Context(), code.InstructionStart(this, frame->pc()),
table.LookupReturn(0), code.constant_pool(),
handler->address() + StackHandlerConstants::kSize,
0);
0, visited_frames);
}
#if V8_ENABLE_WEBASSEMBLY
......@@ -1837,7 +1840,8 @@ Object Isolate::UnwindAndFindHandler() {
StandardFrameConstants::kFixedFrameSizeAboveFp -
code.stack_slots() * kSystemPointerSize;
return FoundHandler(Context(), instruction_start, handler_offset,
code.constant_pool(), return_sp, frame->fp());
code.constant_pool(), return_sp, frame->fp(),
visited_frames);
}
case StackFrame::WASM: {
......@@ -1865,7 +1869,8 @@ Object Isolate::UnwindAndFindHandler() {
// destructors have been executed.
set_thread_in_wasm_flag_scope.Enable();
return FoundHandler(Context(), wasm_code->instruction_start(), offset,
wasm_code->constant_pool(), return_sp, frame->fp());
wasm_code->constant_pool(), return_sp, frame->fp(),
visited_frames);
}
case StackFrame::WASM_COMPILE_LAZY: {
......@@ -1902,7 +1907,7 @@ Object Isolate::UnwindAndFindHandler() {
return FoundHandler(Context(), code.InstructionStart(this, frame->pc()),
offset, code.constant_pool(), return_sp,
frame->fp());
frame->fp(), visited_frames);
}
case StackFrame::STUB: {
......@@ -1930,7 +1935,7 @@ Object Isolate::UnwindAndFindHandler() {
return FoundHandler(Context(), code.InstructionStart(this, frame->pc()),
offset, code.constant_pool(), return_sp,
frame->fp());
frame->fp(), visited_frames);
}
case StackFrame::INTERPRETED:
......@@ -1970,15 +1975,24 @@ Object Isolate::UnwindAndFindHandler() {
sp_frame->PatchContext(context);
return FoundHandler(
Context(), code.InstructionStart(this, sp_frame->sp()), pc_offset,
code.constant_pool(), return_sp, sp_frame->fp());
code.constant_pool(), return_sp, sp_frame->fp(), visited_frames);
} else {
InterpretedFrame::cast(js_frame)->PatchBytecodeOffset(
static_cast<int>(offset));
Code code =
FromCodeT(builtins()->code(Builtin::kInterpreterEnterAtBytecode));
// We subtract a frame from visited_frames because otherwise the
// shadow stack will drop the underlying interpreter entry trampoline
// in which the handler runs.
//
// An interpreted frame cannot be the first frame we look at
// because at a minimum, an exit frame into C++ has to separate
// it and the context in which this C++ code runs.
CHECK_GE(visited_frames, 1);
return FoundHandler(context, code.InstructionStart(), 0,
code.constant_pool(), return_sp, frame->fp());
code.constant_pool(), return_sp, frame->fp(),
visited_frames - 1);
}
}
......@@ -2001,7 +2015,8 @@ Object Isolate::UnwindAndFindHandler() {
Address return_sp = js_frame->fp() - js_frame->GetSPToFPDelta();
Code code = js_frame->LookupCode();
return FoundHandler(Context(), code.InstructionStart(), 0,
code.constant_pool(), return_sp, frame->fp());
code.constant_pool(), return_sp, frame->fp(),
visited_frames);
}
default:
......
......@@ -745,6 +745,7 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory {
THREAD_LOCAL_TOP_ADDRESS(Address, pending_handler_constant_pool)
THREAD_LOCAL_TOP_ADDRESS(Address, pending_handler_fp)
THREAD_LOCAL_TOP_ADDRESS(Address, pending_handler_sp)
THREAD_LOCAL_TOP_ADDRESS(uintptr_t, num_frames_above_pending_handler)
THREAD_LOCAL_TOP_ACCESSOR(bool, external_caught_exception)
......
......@@ -19,6 +19,7 @@ void ThreadLocalTop::Clear() {
pending_handler_constant_pool_ = kNullAddress;
pending_handler_fp_ = kNullAddress;
pending_handler_sp_ = kNullAddress;
num_frames_above_pending_handler_ = 0;
last_api_entry_ = kNullAddress;
pending_message_ = Object();
rethrowing_message_ = false;
......
......@@ -35,9 +35,9 @@ class ThreadLocalTop {
// refactor this to really consist of just Addresses and 32-bit
// integer fields.
#ifdef V8_ENABLE_CONSERVATIVE_STACK_SCANNING
static constexpr uint32_t kSizeInBytes = 26 * kSystemPointerSize;
static constexpr uint32_t kSizeInBytes = 27 * kSystemPointerSize;
#else
static constexpr uint32_t kSizeInBytes = 25 * kSystemPointerSize;
static constexpr uint32_t kSizeInBytes = 26 * kSystemPointerSize;
#endif
// Does early low-level initialization that does not depend on the
......@@ -118,6 +118,7 @@ class ThreadLocalTop {
Address pending_handler_constant_pool_;
Address pending_handler_fp_;
Address pending_handler_sp_;
uintptr_t num_frames_above_pending_handler_;
Address last_api_entry_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment