Commit 0f1fbfbe authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[osr] Refactor TieringManager::MaybeOptimizeFrame

This started out as a minor code move of early-osr logic, but
became a more general refactor of the tiering decisions.

Early-OSR: the intent here is to trigger OSR as soon as possible
when matching OSR'd code is cached. Move this out of ShouldOptimize
(since it has side effects), and into a dedicated function that's
called early in the decision process.

Note that with this change, we no longer trigger normal TF optimization
along with the OSR request - TF tiering heuristics are already complex
enough, let's not add yet another special case right now.

Other refactors:

- Clarify terminology around OSR. None of the functions in TM actually
  perform OSR; instead, they only increase the OSR urgency, effectively
  increasing the set of loops that will trigger OSR compilation.
- Clarify the control flow through the tiering decisions. Notably,
  we only increment OSR urgency when normal tierup has previously been
  requested. Also, there is a bytecode size limit involved. These
  conditions were previously hidden inside other functions.

Bug: v8:12161
Change-Id: I8f58b4332bd9851c6b299655ce840555fb7efa92
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3529448Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79512}
parent 4557c3f4
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "src/handles/global-handles.h" #include "src/handles/global-handles.h"
#include "src/init/bootstrapper.h" #include "src/init/bootstrapper.h"
#include "src/interpreter/interpreter.h" #include "src/interpreter/interpreter.h"
#include "src/objects/code.h"
#include "src/tracing/trace-event.h" #include "src/tracing/trace-event.h"
namespace v8 { namespace v8 {
...@@ -148,33 +149,34 @@ void TieringManager::Optimize(JSFunction function, CodeKind code_kind, ...@@ -148,33 +149,34 @@ void TieringManager::Optimize(JSFunction function, CodeKind code_kind,
function.MarkForOptimization(isolate_, d.code_kind, d.concurrency_mode); function.MarkForOptimization(isolate_, d.code_kind, d.concurrency_mode);
} }
void TieringManager::AttemptOnStackReplacement(UnoptimizedFrame* frame, namespace {
int loop_nesting_levels) {
JSFunction function = frame->function();
SharedFunctionInfo shared = function.shared();
if (!FLAG_use_osr || !shared.IsUserJavaScript()) {
return;
}
// If the code is not optimizable, don't try OSR.
if (shared.optimization_disabled()) return;
// We're using on-stack replacement: Store new loop nesting level in bool HaveCachedOSRCodeForCurrentBytecodeOffset(UnoptimizedFrame* frame,
// BytecodeArray header so that certain back edges in any interpreter frame int* osr_urgency_out) {
// for this bytecode will trigger on-stack replacement for that frame. JSFunction function = frame->function();
if (FLAG_trace_osr) { BytecodeArray bytecode = frame->GetBytecodeArray();
CodeTracer::Scope scope(isolate_->GetCodeTracer()); const int bytecode_offset = frame->GetBytecodeOffset();
PrintF(scope.file(), "[OSR - arming back edges in "); if (V8_UNLIKELY(function.shared().osr_code_cache_state() != kNotCached)) {
function.PrintName(scope.file()); OSROptimizedCodeCache cache =
PrintF(scope.file(), "]\n"); function.context().native_context().GetOSROptimizedCodeCache();
interpreter::BytecodeArrayIterator iterator(
handle(bytecode, frame->isolate()));
for (int jump_offset : cache.GetBytecodeOffsetsFromSFI(function.shared())) {
iterator.SetOffset(jump_offset);
if (base::IsInRange(bytecode_offset, iterator.GetJumpTargetOffset(),
jump_offset)) {
int loop_depth = iterator.GetImmediateOperand(1);
// `+ 1` because osr_urgency is an exclusive upper limit on the depth.
*osr_urgency_out = loop_depth + 1;
return true;
}
}
} }
return false;
DCHECK(frame->is_unoptimized());
const int urgency = frame->GetBytecodeArray().osr_urgency();
frame->GetBytecodeArray().set_osr_urgency(
std::min({urgency + loop_nesting_levels, BytecodeArray::kMaxOsrUrgency}));
} }
} // namespace
namespace { namespace {
bool TiersUpToMaglev(CodeKind code_kind) { bool TiersUpToMaglev(CodeKind code_kind) {
...@@ -209,15 +211,80 @@ int TieringManager::InitialInterruptBudget() { ...@@ -209,15 +211,80 @@ int TieringManager::InitialInterruptBudget() {
: FLAG_interrupt_budget; : FLAG_interrupt_budget;
} }
namespace {
bool SmallEnoughForOSR(Isolate* isolate, JSFunction function) {
return function.shared().GetBytecodeArray(isolate).length() <=
kOSRBytecodeSizeAllowanceBase +
function.feedback_vector().profiler_ticks() *
kOSRBytecodeSizeAllowancePerTick;
}
void TrySetOsrUrgency(Isolate* isolate, JSFunction function, int osr_urgency) {
SharedFunctionInfo shared = function.shared();
if (V8_UNLIKELY(!FLAG_use_osr)) return;
if (V8_UNLIKELY(!shared.IsUserJavaScript())) return;
if (V8_UNLIKELY(shared.optimization_disabled())) return;
// We've passed all checks - bump the OSR urgency.
if (V8_UNLIKELY(FLAG_trace_osr)) {
CodeTracer::Scope scope(isolate->GetCodeTracer());
PrintF(scope.file(), "[OSR - arming back edges in ");
function.PrintName(scope.file());
PrintF(scope.file(), "]\n");
}
BytecodeArray bytecode = shared.GetBytecodeArray(isolate);
DCHECK_GE(osr_urgency, bytecode.osr_urgency()); // Never lower urgency here.
bytecode.set_osr_urgency(osr_urgency);
}
void TryIncrementOsrUrgency(Isolate* isolate, JSFunction function) {
int old_urgency = function.shared().GetBytecodeArray(isolate).osr_urgency();
int new_urgency = std::min(old_urgency + 1, BytecodeArray::kMaxOsrUrgency);
TrySetOsrUrgency(isolate, function, new_urgency);
}
void TryRequestOsrAtNextOpportunity(Isolate* isolate, JSFunction function) {
TrySetOsrUrgency(isolate, function, BytecodeArray::kMaxOsrUrgency);
}
void TryRequestOsrForCachedOsrCode(Isolate* isolate, JSFunction function,
int osr_urgency_for_cached_osr_code) {
DCHECK_LE(osr_urgency_for_cached_osr_code, BytecodeArray::kMaxOsrUrgency);
int old_urgency = function.shared().GetBytecodeArray(isolate).osr_urgency();
// Make sure not to decrease the existing urgency.
int new_urgency = std::max(old_urgency, osr_urgency_for_cached_osr_code);
TrySetOsrUrgency(isolate, function, new_urgency);
}
bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) {
return !any_ic_changed &&
bytecode_size < FLAG_max_bytecode_size_for_early_opt;
}
} // namespace
void TieringManager::RequestOsrAtNextOpportunity(JSFunction function) {
DisallowGarbageCollection no_gc;
TryRequestOsrAtNextOpportunity(isolate_, function);
}
void TieringManager::MaybeOptimizeFrame(JSFunction function, void TieringManager::MaybeOptimizeFrame(JSFunction function,
JavaScriptFrame* frame, UnoptimizedFrame* frame,
CodeKind code_kind) { CodeKind code_kind) {
if (function.IsInOptimizationQueue()) { const OptimizationMarker opt_marker =
function.feedback_vector().optimization_marker();
if (V8_UNLIKELY(opt_marker == OptimizationMarker::kInOptimizationQueue)) {
// Note: This effectively disables OSR for the function while it is being
// compiled.
TraceInOptimizationQueue(function); TraceInOptimizationQueue(function);
return; return;
} }
if (FLAG_testing_d8_test_runner && if (V8_UNLIKELY(FLAG_testing_d8_test_runner) &&
!PendingOptimizationTable::IsHeuristicOptimizationAllowed(isolate_, !PendingOptimizationTable::IsHeuristicOptimizationAllowed(isolate_,
function)) { function)) {
TraceHeuristicOptimizationDisallowed(function); TraceHeuristicOptimizationDisallowed(function);
...@@ -225,46 +292,41 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function, ...@@ -225,46 +292,41 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
} }
// TODO(v8:7700): Consider splitting this up for Maglev/Turbofan. // TODO(v8:7700): Consider splitting this up for Maglev/Turbofan.
if (function.shared().optimization_disabled()) return; if (V8_UNLIKELY(function.shared().optimization_disabled())) return;
if (frame->is_unoptimized()) { if (V8_UNLIKELY(FLAG_always_osr)) {
if (V8_UNLIKELY(FLAG_always_osr)) { TryRequestOsrAtNextOpportunity(isolate_, function);
AttemptOnStackReplacement(UnoptimizedFrame::cast(frame), // Continue below and do a normal optimized compile as well.
BytecodeArray::kMaxOsrUrgency);
// Fall through and do a normal optimized compile as well.
} else if (MaybeOSR(function, UnoptimizedFrame::cast(frame))) {
return;
}
} }
OptimizationDecision d = ShouldOptimize(function, code_kind, frame); // If we have matching cached OSR'd code, request OSR at the next opportunity.
if (d.should_optimize()) Optimize(function, code_kind, d); int osr_urgency_for_cached_osr_code;
} if (HaveCachedOSRCodeForCurrentBytecodeOffset(
frame, &osr_urgency_for_cached_osr_code)) {
TryRequestOsrForCachedOsrCode(isolate_, function,
osr_urgency_for_cached_osr_code);
}
bool TieringManager::MaybeOSR(JSFunction function, UnoptimizedFrame* frame) { const bool is_marked_for_any_optimization =
int ticks = function.feedback_vector().profiler_ticks(); (static_cast<uint32_t>(opt_marker) & kNoneOrInOptimizationQueueMask) != 0;
if (function.IsMarkedForOptimization() || if (is_marked_for_any_optimization || function.HasAvailableOptimizedCode()) {
function.IsMarkedForConcurrentOptimization() || // OSR kicks in only once we've previously decided to tier up, but we are
function.HasAvailableOptimizedCode()) { // still in the unoptimized frame (this implies a long-running loop).
int64_t allowance = kOSRBytecodeSizeAllowanceBase + if (SmallEnoughForOSR(isolate_, function)) {
ticks * kOSRBytecodeSizeAllowancePerTick; TryIncrementOsrUrgency(isolate_, function);
if (function.shared().GetBytecodeArray(isolate_).length() <= allowance) {
AttemptOnStackReplacement(frame);
} }
return true;
}
return false;
}
namespace { // Return unconditionally and don't run through the optimization decision
// again; we've already decided to tier up previously.
return;
}
bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) { DCHECK(!is_marked_for_any_optimization &&
return !any_ic_changed && !function.HasAvailableOptimizedCode());
bytecode_size < FLAG_max_bytecode_size_for_early_opt; OptimizationDecision d = ShouldOptimize(function, code_kind, frame);
if (d.should_optimize()) Optimize(function, code_kind, d);
} }
} // namespace
OptimizationDecision TieringManager::ShouldOptimize(JSFunction function, OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
CodeKind code_kind, CodeKind code_kind,
JavaScriptFrame* frame) { JavaScriptFrame* frame) {
...@@ -277,31 +339,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function, ...@@ -277,31 +339,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
return OptimizationDecision::DoNotOptimize(); return OptimizationDecision::DoNotOptimize();
} }
// If function's SFI has OSR cache, once enter loop range of OSR cache, set
// OSR loop nesting level for matching condition of OSR (loop_depth <
// osr_level), soon later OSR will be triggered when executing bytecode
// JumpLoop which is entry of the OSR cache, then hit the OSR cache.
BytecodeArray bytecode = function.shared().GetBytecodeArray(isolate_); BytecodeArray bytecode = function.shared().GetBytecodeArray(isolate_);
if (V8_UNLIKELY(function.shared().osr_code_cache_state() > kNotCached) &&
frame->is_unoptimized()) {
int current_offset =
static_cast<UnoptimizedFrame*>(frame)->GetBytecodeOffset();
OSROptimizedCodeCache cache =
function.context().native_context().GetOSROptimizedCodeCache();
std::vector<int> bytecode_offsets =
cache.GetBytecodeOffsetsFromSFI(function.shared());
interpreter::BytecodeArrayIterator iterator(
Handle<BytecodeArray>(bytecode, isolate_));
for (int jump_offset : bytecode_offsets) {
iterator.SetOffset(jump_offset);
int jump_target_offset = iterator.GetJumpTargetOffset();
if (jump_offset >= current_offset &&
current_offset >= jump_target_offset) {
bytecode.set_osr_urgency(iterator.GetImmediateOperand(1) + 1);
return OptimizationDecision::TurbofanHotAndStable();
}
}
}
const int ticks = function.feedback_vector().profiler_ticks(); const int ticks = function.feedback_vector().profiler_ticks();
const int ticks_for_optimization = const int ticks_for_optimization =
FLAG_ticks_before_optimization + FLAG_ticks_before_optimization +
...@@ -324,6 +362,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function, ...@@ -324,6 +362,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
bytecode.length(), FLAG_max_bytecode_size_for_early_opt); bytecode.length(), FLAG_max_bytecode_size_for_early_opt);
} }
} }
return OptimizationDecision::DoNotOptimize(); return OptimizationDecision::DoNotOptimize();
} }
...@@ -402,9 +441,9 @@ void TieringManager::OnInterruptTick(Handle<JSFunction> function) { ...@@ -402,9 +441,9 @@ void TieringManager::OnInterruptTick(Handle<JSFunction> function) {
function_obj.feedback_vector().SaturatingIncrementProfilerTicks(); function_obj.feedback_vector().SaturatingIncrementProfilerTicks();
JavaScriptFrameIterator it(isolate_); JavaScriptFrameIterator it(isolate_);
DCHECK(it.frame()->is_unoptimized()); UnoptimizedFrame* frame = UnoptimizedFrame::cast(it.frame());
const CodeKind code_kind = function_obj.GetActiveTier().value(); const CodeKind code_kind = function_obj.GetActiveTier().value();
MaybeOptimizeFrame(function_obj, it.frame(), code_kind); MaybeOptimizeFrame(function_obj, frame, code_kind);
} }
} // namespace internal } // namespace internal
......
...@@ -32,8 +32,8 @@ class TieringManager { ...@@ -32,8 +32,8 @@ class TieringManager {
void NotifyICChanged() { any_ic_changed_ = true; } void NotifyICChanged() { any_ic_changed_ = true; }
void AttemptOnStackReplacement(UnoptimizedFrame* frame, // After this request, the next JumpLoop will perform OSR.
int nesting_levels = 1); void RequestOsrAtNextOpportunity(JSFunction function);
// For use when a JSFunction is available. // For use when a JSFunction is available.
static int InterruptBudgetFor(Isolate* isolate, JSFunction function); static int InterruptBudgetFor(Isolate* isolate, JSFunction function);
...@@ -43,12 +43,10 @@ class TieringManager { ...@@ -43,12 +43,10 @@ class TieringManager {
private: private:
// Make the decision whether to optimize the given function, and mark it for // Make the decision whether to optimize the given function, and mark it for
// optimization if the decision was 'yes'. // optimization if the decision was 'yes'.
void MaybeOptimizeFrame(JSFunction function, JavaScriptFrame* frame, // This function is also responsible for bumping the OSR urgency.
void MaybeOptimizeFrame(JSFunction function, UnoptimizedFrame* frame,
CodeKind code_kind); CodeKind code_kind);
// Potentially attempts OSR from and returns whether no other
// optimization attempts should be made.
bool MaybeOSR(JSFunction function, UnoptimizedFrame* frame);
OptimizationDecision ShouldOptimize(JSFunction function, CodeKind code_kind, OptimizationDecision ShouldOptimize(JSFunction function, CodeKind code_kind,
JavaScriptFrame* frame); JavaScriptFrame* frame);
void Optimize(JSFunction function, CodeKind code_kind, void Optimize(JSFunction function, CodeKind code_kind,
......
...@@ -12,15 +12,13 @@ ...@@ -12,15 +12,13 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
// This enum are states that how many OSR code caches belong to a SFI. Without // This enum is a performance optimization for accessing the OSR code cache -
// this enum, need to check all OSR code cache entries to know whether a // we can skip cache iteration in many cases unless there are multiple entries
// JSFunction's SFI has OSR code cache. The enum value kCachedMultiple is for // for a particular SharedFunctionInfo.
// doing time-consuming loop check only when the very unlikely state change
// kCachedMultiple -> { kCachedOnce | kCachedMultiple }.
enum OSRCodeCacheStateOfSFI : uint8_t { enum OSRCodeCacheStateOfSFI : uint8_t {
kNotCached, // Likely state, no OSR code cache kNotCached, // Likely state.
kCachedOnce, // Unlikely state, one OSR code cache kCachedOnce, // Unlikely state, one entry.
kCachedMultiple, // Very unlikely state, multiple OSR code caches kCachedMultiple, // Very unlikely state, multiple entries.
}; };
class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray { class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
......
...@@ -581,10 +581,8 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) { ...@@ -581,10 +581,8 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
function->MarkForOptimization(isolate, CodeKind::TURBOFAN, function->MarkForOptimization(isolate, CodeKind::TURBOFAN,
ConcurrencyMode::kNotConcurrent); ConcurrencyMode::kNotConcurrent);
// Make the profiler arm all back edges in unoptimized code.
if (it.frame()->is_unoptimized()) { if (it.frame()->is_unoptimized()) {
isolate->tiering_manager()->AttemptOnStackReplacement( isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function);
UnoptimizedFrame::cast(it.frame()), BytecodeArray::kMaxOsrUrgency);
} }
return ReadOnlyRoots(isolate).undefined_value(); return ReadOnlyRoots(isolate).undefined_value();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment