Commit 12284536 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[osr] Opportunistically keep deoptimized OSR code in the cache

.. when deoptimization occurs after the outermost loop containing the
loop that triggered OSR compilation. The reasoning is that the main
benefit of OSR'd code is speeding up the OSR'd loop; the speedup of
the OSR'd loop is assumed to be higher than deoptimization overhead.

This is a slightly modified version of crrev.com/c/3521361, credit
goes to tao.pan@intel.com for most of the investigation and
implementation work.

Bug: v8:12161
Change-Id: Ie729dd5d1df9c7f529a1cf1b9471bb60ce76c41a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3607988Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Jakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80177}
parent 2f535cdf
...@@ -490,6 +490,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() { ...@@ -490,6 +490,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
.set_is_turbofanned() .set_is_turbofanned()
.set_stack_slots(frame()->GetTotalFrameSlotCount()) .set_stack_slots(frame()->GetTotalFrameSlotCount())
.set_profiler_data(info()->profiler_data()) .set_profiler_data(info()->profiler_data())
.set_osr_offset(info()->osr_offset())
.TryBuild(); .TryBuild();
Handle<Code> code; Handle<Code> code;
......
...@@ -533,6 +533,7 @@ Deoptimizer::Deoptimizer(Isolate* isolate, JSFunction function, ...@@ -533,6 +533,7 @@ Deoptimizer::Deoptimizer(Isolate* isolate, JSFunction function,
DCHECK_EQ(0, offset % kLazyDeoptExitSize); DCHECK_EQ(0, offset % kLazyDeoptExitSize);
deopt_exit_index_ = eager_deopt_count + (offset / kLazyDeoptExitSize); deopt_exit_index_ = eager_deopt_count + (offset / kLazyDeoptExitSize);
} }
deopt_exit_bytecode_offset_ = deopt_data.GetBytecodeOffset(deopt_exit_index_);
} }
Code Deoptimizer::FindOptimizedCode() { Code Deoptimizer::FindOptimizedCode() {
...@@ -641,8 +642,7 @@ int LookupCatchHandler(Isolate* isolate, TranslatedFrame* translated_frame, ...@@ -641,8 +642,7 @@ int LookupCatchHandler(Isolate* isolate, TranslatedFrame* translated_frame,
} // namespace } // namespace
void Deoptimizer::TraceDeoptBegin(int optimization_id, void Deoptimizer::TraceDeoptBegin(int optimization_id) {
BytecodeOffset bytecode_offset) {
DCHECK(tracing_enabled()); DCHECK(tracing_enabled());
FILE* file = trace_scope()->file(); FILE* file = trace_scope()->file();
Deoptimizer::DeoptInfo info = Deoptimizer::DeoptInfo info =
...@@ -666,8 +666,9 @@ void Deoptimizer::TraceDeoptBegin(int optimization_id, ...@@ -666,8 +666,9 @@ void Deoptimizer::TraceDeoptBegin(int optimization_id,
#ifdef DEBUG #ifdef DEBUG
info.node_id, info.node_id,
#endif // DEBUG #endif // DEBUG
bytecode_offset.ToInt(), deopt_exit_index_, fp_to_sp_delta_, deopt_exit_bytecode_offset_.ToInt(), deopt_exit_index_,
caller_frame_top_, PointerAuthentication::StripPAC(from_)); fp_to_sp_delta_, caller_frame_top_,
PointerAuthentication::StripPAC(from_));
if (verbose_tracing_enabled() && deopt_kind_ != DeoptimizeKind::kLazy) { if (verbose_tracing_enabled() && deopt_kind_ != DeoptimizeKind::kLazy) {
PrintF(file, " ;;; deoptimize at "); PrintF(file, " ;;; deoptimize at ");
OFStream outstr(file); OFStream outstr(file);
...@@ -795,15 +796,13 @@ void Deoptimizer::DoComputeOutputFrames() { ...@@ -795,15 +796,13 @@ void Deoptimizer::DoComputeOutputFrames() {
CHECK_GT(static_cast<uintptr_t>(caller_frame_top_), CHECK_GT(static_cast<uintptr_t>(caller_frame_top_),
stack_guard->real_jslimit()); stack_guard->real_jslimit());
BytecodeOffset bytecode_offset =
input_data.GetBytecodeOffset(deopt_exit_index_);
ByteArray translations = input_data.TranslationByteArray(); ByteArray translations = input_data.TranslationByteArray();
unsigned translation_index = unsigned translation_index =
input_data.TranslationIndex(deopt_exit_index_).value(); input_data.TranslationIndex(deopt_exit_index_).value();
if (tracing_enabled()) { if (tracing_enabled()) {
timer.Start(); timer.Start();
TraceDeoptBegin(input_data.OptimizationId().value(), bytecode_offset); TraceDeoptBegin(input_data.OptimizationId().value());
} }
FILE* trace_file = FILE* trace_file =
......
...@@ -54,6 +54,9 @@ class Deoptimizer : public Malloced { ...@@ -54,6 +54,9 @@ class Deoptimizer : public Malloced {
Handle<JSFunction> function() const; Handle<JSFunction> function() const;
Handle<Code> compiled_code() const; Handle<Code> compiled_code() const;
DeoptimizeKind deopt_kind() const { return deopt_kind_; } DeoptimizeKind deopt_kind() const { return deopt_kind_; }
BytecodeOffset deopt_exit_bytecode_offset() const {
return deopt_exit_bytecode_offset_;
}
static Deoptimizer* New(Address raw_function, DeoptimizeKind kind, static Deoptimizer* New(Address raw_function, DeoptimizeKind kind,
Address from, int fp_to_sp_delta, Isolate* isolate); Address from, int fp_to_sp_delta, Isolate* isolate);
...@@ -180,7 +183,7 @@ class Deoptimizer : public Malloced { ...@@ -180,7 +183,7 @@ class Deoptimizer : public Malloced {
CodeTracer::Scope* verbose_trace_scope() const { CodeTracer::Scope* verbose_trace_scope() const {
return FLAG_trace_deopt_verbose ? trace_scope() : nullptr; return FLAG_trace_deopt_verbose ? trace_scope() : nullptr;
} }
void TraceDeoptBegin(int optimization_id, BytecodeOffset bytecode_offset); void TraceDeoptBegin(int optimization_id);
void TraceDeoptEnd(double deopt_duration); void TraceDeoptEnd(double deopt_duration);
#ifdef DEBUG #ifdef DEBUG
static void TraceFoundActivation(Isolate* isolate, JSFunction function); static void TraceFoundActivation(Isolate* isolate, JSFunction function);
...@@ -192,6 +195,7 @@ class Deoptimizer : public Malloced { ...@@ -192,6 +195,7 @@ class Deoptimizer : public Malloced {
JSFunction function_; JSFunction function_;
Code compiled_code_; Code compiled_code_;
unsigned deopt_exit_index_; unsigned deopt_exit_index_;
BytecodeOffset deopt_exit_bytecode_offset_ = BytecodeOffset::None();
DeoptimizeKind deopt_kind_; DeoptimizeKind deopt_kind_;
Address from_; Address from_;
int fp_to_sp_delta_; int fp_to_sp_delta_;
......
...@@ -180,6 +180,7 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal( ...@@ -180,6 +180,7 @@ MaybeHandle<Code> Factory::CodeBuilder::BuildInternal(
// this field. We currently assume it's immutable thus a relaxed read (after // this field. We currently assume it's immutable thus a relaxed read (after
// passing IsPendingAllocation). // passing IsPendingAllocation).
raw_code.set_inlined_bytecode_size(inlined_bytecode_size_); raw_code.set_inlined_bytecode_size(inlined_bytecode_size_);
raw_code.set_osr_offset(osr_offset_);
raw_code.set_code_data_container(*data_container, kReleaseStore); raw_code.set_code_data_container(*data_container, kReleaseStore);
if (kind_ == CodeKind::BASELINE) { if (kind_ == CodeKind::BASELINE) {
raw_code.set_bytecode_or_interpreter_data(*interpreter_data_); raw_code.set_bytecode_or_interpreter_data(*interpreter_data_);
......
...@@ -916,6 +916,12 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> { ...@@ -916,6 +916,12 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
return *this; return *this;
} }
CodeBuilder& set_osr_offset(BytecodeOffset offset) {
DCHECK_IMPLIES(!offset.IsNone(), CodeKindCanOSR(kind_));
osr_offset_ = offset;
return *this;
}
CodeBuilder& set_source_position_table(Handle<ByteArray> table) { CodeBuilder& set_source_position_table(Handle<ByteArray> table) {
DCHECK_NE(kind_, CodeKind::BASELINE); DCHECK_NE(kind_, CodeKind::BASELINE);
DCHECK(!table.is_null()); DCHECK(!table.is_null());
...@@ -993,6 +999,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> { ...@@ -993,6 +999,7 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
MaybeHandle<Object> self_reference_; MaybeHandle<Object> self_reference_;
Builtin builtin_ = Builtin::kNoBuiltinId; Builtin builtin_ = Builtin::kNoBuiltinId;
uint32_t inlined_bytecode_size_ = 0; uint32_t inlined_bytecode_size_ = 0;
BytecodeOffset osr_offset_ = BytecodeOffset::None();
int32_t kind_specific_flags_ = 0; int32_t kind_specific_flags_ = 0;
// Either source_position_table for non-baseline code // Either source_position_table for non-baseline code
// or bytecode_offset_table for baseline code. // or bytecode_offset_table for baseline code.
......
...@@ -729,6 +729,14 @@ void Code::set_inlined_bytecode_size(unsigned size) { ...@@ -729,6 +729,14 @@ void Code::set_inlined_bytecode_size(unsigned size) {
RELAXED_WRITE_UINT_FIELD(*this, kInlinedBytecodeSizeOffset, size); RELAXED_WRITE_UINT_FIELD(*this, kInlinedBytecodeSizeOffset, size);
} }
BytecodeOffset Code::osr_offset() const {
return BytecodeOffset(ReadField<int32_t>(kOsrOffsetOffset));
}
void Code::set_osr_offset(BytecodeOffset offset) {
WriteField<int32_t>(kOsrOffsetOffset, offset.ToInt());
}
bool Code::uses_safepoint_table() const { bool Code::uses_safepoint_table() const {
return is_turbofanned() || is_maglevved() || is_wasm_code(); return is_turbofanned() || is_maglevved() || is_wasm_code();
} }
......
...@@ -451,6 +451,9 @@ class Code : public HeapObject { ...@@ -451,6 +451,9 @@ class Code : public HeapObject {
inline unsigned inlined_bytecode_size() const; inline unsigned inlined_bytecode_size() const;
inline void set_inlined_bytecode_size(unsigned size); inline void set_inlined_bytecode_size(unsigned size);
inline BytecodeOffset osr_offset() const;
inline void set_osr_offset(BytecodeOffset offset);
// [uses_safepoint_table]: Whether this Code object uses safepoint tables // [uses_safepoint_table]: Whether this Code object uses safepoint tables
// (note the table may still be empty, see has_safepoint_table). // (note the table may still be empty, see has_safepoint_table).
inline bool uses_safepoint_table() const; inline bool uses_safepoint_table() const;
...@@ -624,6 +627,7 @@ class Code : public HeapObject { ...@@ -624,6 +627,7 @@ class Code : public HeapObject {
V(kFlagsOffset, kInt32Size) \ V(kFlagsOffset, kInt32Size) \
V(kBuiltinIndexOffset, kIntSize) \ V(kBuiltinIndexOffset, kIntSize) \
V(kInlinedBytecodeSizeOffset, kIntSize) \ V(kInlinedBytecodeSizeOffset, kIntSize) \
V(kOsrOffsetOffset, kInt32Size) \
/* Offsets describing inline metadata tables, relative to MetadataStart. */ \ /* Offsets describing inline metadata tables, relative to MetadataStart. */ \
V(kHandlerTableOffsetOffset, kIntSize) \ V(kHandlerTableOffsetOffset, kIntSize) \
V(kConstantPoolOffsetOffset, \ V(kConstantPoolOffsetOffset, \
...@@ -643,28 +647,28 @@ class Code : public HeapObject { ...@@ -643,28 +647,28 @@ class Code : public HeapObject {
// due to padding for code alignment. // due to padding for code alignment.
#if V8_TARGET_ARCH_ARM64 #if V8_TARGET_ARCH_ARM64
static constexpr int kHeaderPaddingSize = static constexpr int kHeaderPaddingSize =
V8_EXTERNAL_CODE_SPACE_BOOL ? 8 : (COMPRESS_POINTERS_BOOL ? 12 : 24); V8_EXTERNAL_CODE_SPACE_BOOL ? 4 : (COMPRESS_POINTERS_BOOL ? 8 : 20);
#elif V8_TARGET_ARCH_MIPS64 #elif V8_TARGET_ARCH_MIPS64
static constexpr int kHeaderPaddingSize = 24; static constexpr int kHeaderPaddingSize = 20;
#elif V8_TARGET_ARCH_LOONG64 #elif V8_TARGET_ARCH_LOONG64
static constexpr int kHeaderPaddingSize = 24; static constexpr int kHeaderPaddingSize = 20;
#elif V8_TARGET_ARCH_X64 #elif V8_TARGET_ARCH_X64
static constexpr int kHeaderPaddingSize = static constexpr int kHeaderPaddingSize =
V8_EXTERNAL_CODE_SPACE_BOOL ? 8 : (COMPRESS_POINTERS_BOOL ? 12 : 56); V8_EXTERNAL_CODE_SPACE_BOOL ? 4 : (COMPRESS_POINTERS_BOOL ? 8 : 52);
#elif V8_TARGET_ARCH_ARM #elif V8_TARGET_ARCH_ARM
static constexpr int kHeaderPaddingSize = 12; static constexpr int kHeaderPaddingSize = 8;
#elif V8_TARGET_ARCH_IA32 #elif V8_TARGET_ARCH_IA32
static constexpr int kHeaderPaddingSize = 12; static constexpr int kHeaderPaddingSize = 8;
#elif V8_TARGET_ARCH_MIPS #elif V8_TARGET_ARCH_MIPS
static constexpr int kHeaderPaddingSize = 12; static constexpr int kHeaderPaddingSize = 8;
#elif V8_TARGET_ARCH_PPC64 #elif V8_TARGET_ARCH_PPC64
static constexpr int kHeaderPaddingSize = static constexpr int kHeaderPaddingSize =
FLAG_enable_embedded_constant_pool ? (COMPRESS_POINTERS_BOOL ? 8 : 52) FLAG_enable_embedded_constant_pool ? (COMPRESS_POINTERS_BOOL ? 4 : 48)
: (COMPRESS_POINTERS_BOOL ? 12 : 56); : (COMPRESS_POINTERS_BOOL ? 8 : 52);
#elif V8_TARGET_ARCH_S390X #elif V8_TARGET_ARCH_S390X
static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 12 : 24; static constexpr int kHeaderPaddingSize = COMPRESS_POINTERS_BOOL ? 8 : 20;
#elif V8_TARGET_ARCH_RISCV64 #elif V8_TARGET_ARCH_RISCV64
static constexpr int kHeaderPaddingSize = (COMPRESS_POINTERS_BOOL ? 12 : 24); static constexpr int kHeaderPaddingSize = (COMPRESS_POINTERS_BOOL ? 8 : 20);
#else #else
#error Unknown architecture. #error Unknown architecture.
#endif #endif
......
...@@ -172,6 +172,51 @@ RUNTIME_FUNCTION(Runtime_InstantiateAsmJs) { ...@@ -172,6 +172,51 @@ RUNTIME_FUNCTION(Runtime_InstantiateAsmJs) {
return Smi::zero(); return Smi::zero();
} }
namespace {
// Whether the deopt exit is contained by the outermost loop containing the
// osr'd loop. For example:
//
// for (;;) {
// for (;;) {
// } // OSR is triggered on this backedge.
// } // This is the outermost loop containing the osr'd loop.
bool DeoptExitIsInsideOsrLoop(Isolate* isolate, JSFunction function,
BytecodeOffset deopt_exit_offset,
BytecodeOffset osr_offset) {
DisallowGarbageCollection no_gc;
DCHECK(!deopt_exit_offset.IsNone());
DCHECK(!osr_offset.IsNone());
interpreter::BytecodeArrayIterator it(
handle(function.shared().GetBytecodeArray(isolate), isolate),
osr_offset.ToInt());
DCHECK_EQ(it.current_bytecode(), interpreter::Bytecode::kJumpLoop);
for (; !it.done(); it.Advance()) {
const int current_offset = it.current_offset();
// If we've reached the deopt exit, it's contained in the current loop
// (this is covered by IsInRange below, but this check lets us avoid
// useless iteration).
if (current_offset == deopt_exit_offset.ToInt()) return true;
// We're only interested in loop ranges.
if (it.current_bytecode() != interpreter::Bytecode::kJumpLoop) continue;
// Is the deopt exit contained in the current loop?
if (base::IsInRange(deopt_exit_offset.ToInt(), it.GetJumpTargetOffset(),
current_offset)) {
return true;
}
// We've reached nesting level 0, i.e. the current JumpLoop concludes a
// top-level loop.
const int loop_nesting_level = it.GetImmediateOperand(1);
if (loop_nesting_level == 0) return false;
}
UNREACHABLE();
}
} // namespace
RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) { RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
HandleScope scope(isolate); HandleScope scope(isolate);
DCHECK_EQ(0, args.length()); DCHECK_EQ(0, args.length());
...@@ -186,7 +231,7 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) { ...@@ -186,7 +231,7 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
// For OSR the optimized code isn't installed on the function, so get the // For OSR the optimized code isn't installed on the function, so get the
// code object from deoptimizer. // code object from deoptimizer.
Handle<Code> optimized_code = deoptimizer->compiled_code(); Handle<Code> optimized_code = deoptimizer->compiled_code();
DeoptimizeKind type = deoptimizer->deopt_kind(); const DeoptimizeKind deopt_kind = deoptimizer->deopt_kind();
// TODO(turbofan): We currently need the native context to materialize // TODO(turbofan): We currently need the native context to materialize
// the arguments object, but only to get to its map. // the arguments object, but only to get to its map.
...@@ -194,6 +239,8 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) { ...@@ -194,6 +239,8 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
// Make sure to materialize objects before causing any allocation. // Make sure to materialize objects before causing any allocation.
deoptimizer->MaterializeHeapObjects(); deoptimizer->MaterializeHeapObjects();
const BytecodeOffset deopt_exit_offset =
deoptimizer->deopt_exit_bytecode_offset();
delete deoptimizer; delete deoptimizer;
// Ensure the context register is updated for materialized objects. // Ensure the context register is updated for materialized objects.
...@@ -201,8 +248,25 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) { ...@@ -201,8 +248,25 @@ RUNTIME_FUNCTION(Runtime_NotifyDeoptimized) {
JavaScriptFrame* top_frame = top_it.frame(); JavaScriptFrame* top_frame = top_it.frame();
isolate->set_context(Context::cast(top_frame->context())); isolate->set_context(Context::cast(top_frame->context()));
// Invalidate the underlying optimized code on eager deopts. // Lazy deopts don't invalidate the underlying optimized code since the code
if (type == DeoptimizeKind::kEager) { // object itself is still valid (as far as we know); the called function
// caused the deopt, not the function we're currently looking at.
if (deopt_kind == DeoptimizeKind::kLazy) {
return ReadOnlyRoots(isolate).undefined_value();
}
// Non-OSR'd code is deoptimized unconditionally.
//
// For OSR'd code, we keep the optimized code around if deoptimization occurs
// outside the outermost loop containing the loop that triggered OSR
// compilation. The reasoning is that OSR is intended to speed up the
// long-running loop; so if the deoptimization occurs outside this loop it is
// still worth jumping to the OSR'd code on the next run. The reduced cost of
// the loop should pay for the deoptimization costs.
const BytecodeOffset osr_offset = optimized_code->osr_offset();
if (osr_offset.IsNone() ||
DeoptExitIsInsideOsrLoop(isolate, *function, deopt_exit_offset,
osr_offset)) {
Deoptimizer::DeoptimizeFunction(*function, *optimized_code); Deoptimizer::DeoptimizeFunction(*function, *optimized_code);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment