Commit 0f1fbfbe authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[osr] Refactor TieringManager::MaybeOptimizeFrame

This started out as a minor code move of early-osr logic, but
became a more general refactor of the tiering decisions.

Early-OSR: the intent here is to trigger OSR as soon as possible
when matching OSR'd code is cached. Move this out of ShouldOptimize
(since it has side effects), and into a dedicated function that's
called early in the decision process.

Note that with this change, we no longer trigger normal TF optimization
along with the OSR request - TF tiering heuristics are already complex
enough, let's not add yet another special case right now.

Other refactors:

- Clarify terminology around OSR. None of the functions in TM actually
  perform OSR; instead, they only increase the OSR urgency, effectively
  increasing the set of loops that will trigger OSR compilation.
- Clarify the control flow through the tiering decisions. Notably,
  we only increment OSR urgency when normal tierup has previously been
  requested. Also, there is a bytecode size limit involved. These
  conditions were previously hidden inside other functions.

Bug: v8:12161
Change-Id: I8f58b4332bd9851c6b299655ce840555fb7efa92
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3529448Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79512}
parent 4557c3f4
......@@ -17,6 +17,7 @@
#include "src/handles/global-handles.h"
#include "src/init/bootstrapper.h"
#include "src/interpreter/interpreter.h"
#include "src/objects/code.h"
#include "src/tracing/trace-event.h"
namespace v8 {
......@@ -148,33 +149,34 @@ void TieringManager::Optimize(JSFunction function, CodeKind code_kind,
function.MarkForOptimization(isolate_, d.code_kind, d.concurrency_mode);
}
void TieringManager::AttemptOnStackReplacement(UnoptimizedFrame* frame,
int loop_nesting_levels) {
JSFunction function = frame->function();
SharedFunctionInfo shared = function.shared();
if (!FLAG_use_osr || !shared.IsUserJavaScript()) {
return;
}
// If the code is not optimizable, don't try OSR.
if (shared.optimization_disabled()) return;
namespace {
// We're using on-stack replacement: Store new loop nesting level in
// BytecodeArray header so that certain back edges in any interpreter frame
// for this bytecode will trigger on-stack replacement for that frame.
if (FLAG_trace_osr) {
CodeTracer::Scope scope(isolate_->GetCodeTracer());
PrintF(scope.file(), "[OSR - arming back edges in ");
function.PrintName(scope.file());
PrintF(scope.file(), "]\n");
bool HaveCachedOSRCodeForCurrentBytecodeOffset(UnoptimizedFrame* frame,
int* osr_urgency_out) {
JSFunction function = frame->function();
BytecodeArray bytecode = frame->GetBytecodeArray();
const int bytecode_offset = frame->GetBytecodeOffset();
if (V8_UNLIKELY(function.shared().osr_code_cache_state() != kNotCached)) {
OSROptimizedCodeCache cache =
function.context().native_context().GetOSROptimizedCodeCache();
interpreter::BytecodeArrayIterator iterator(
handle(bytecode, frame->isolate()));
for (int jump_offset : cache.GetBytecodeOffsetsFromSFI(function.shared())) {
iterator.SetOffset(jump_offset);
if (base::IsInRange(bytecode_offset, iterator.GetJumpTargetOffset(),
jump_offset)) {
int loop_depth = iterator.GetImmediateOperand(1);
// `+ 1` because osr_urgency is an exclusive upper limit on the depth.
*osr_urgency_out = loop_depth + 1;
return true;
}
}
}
DCHECK(frame->is_unoptimized());
const int urgency = frame->GetBytecodeArray().osr_urgency();
frame->GetBytecodeArray().set_osr_urgency(
std::min({urgency + loop_nesting_levels, BytecodeArray::kMaxOsrUrgency}));
return false;
}
} // namespace
namespace {
bool TiersUpToMaglev(CodeKind code_kind) {
......@@ -209,15 +211,80 @@ int TieringManager::InitialInterruptBudget() {
: FLAG_interrupt_budget;
}
namespace {
bool SmallEnoughForOSR(Isolate* isolate, JSFunction function) {
return function.shared().GetBytecodeArray(isolate).length() <=
kOSRBytecodeSizeAllowanceBase +
function.feedback_vector().profiler_ticks() *
kOSRBytecodeSizeAllowancePerTick;
}
void TrySetOsrUrgency(Isolate* isolate, JSFunction function, int osr_urgency) {
SharedFunctionInfo shared = function.shared();
if (V8_UNLIKELY(!FLAG_use_osr)) return;
if (V8_UNLIKELY(!shared.IsUserJavaScript())) return;
if (V8_UNLIKELY(shared.optimization_disabled())) return;
// We've passed all checks - bump the OSR urgency.
if (V8_UNLIKELY(FLAG_trace_osr)) {
CodeTracer::Scope scope(isolate->GetCodeTracer());
PrintF(scope.file(), "[OSR - arming back edges in ");
function.PrintName(scope.file());
PrintF(scope.file(), "]\n");
}
BytecodeArray bytecode = shared.GetBytecodeArray(isolate);
DCHECK_GE(osr_urgency, bytecode.osr_urgency()); // Never lower urgency here.
bytecode.set_osr_urgency(osr_urgency);
}
void TryIncrementOsrUrgency(Isolate* isolate, JSFunction function) {
int old_urgency = function.shared().GetBytecodeArray(isolate).osr_urgency();
int new_urgency = std::min(old_urgency + 1, BytecodeArray::kMaxOsrUrgency);
TrySetOsrUrgency(isolate, function, new_urgency);
}
void TryRequestOsrAtNextOpportunity(Isolate* isolate, JSFunction function) {
TrySetOsrUrgency(isolate, function, BytecodeArray::kMaxOsrUrgency);
}
void TryRequestOsrForCachedOsrCode(Isolate* isolate, JSFunction function,
int osr_urgency_for_cached_osr_code) {
DCHECK_LE(osr_urgency_for_cached_osr_code, BytecodeArray::kMaxOsrUrgency);
int old_urgency = function.shared().GetBytecodeArray(isolate).osr_urgency();
// Make sure not to decrease the existing urgency.
int new_urgency = std::max(old_urgency, osr_urgency_for_cached_osr_code);
TrySetOsrUrgency(isolate, function, new_urgency);
}
bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) {
return !any_ic_changed &&
bytecode_size < FLAG_max_bytecode_size_for_early_opt;
}
} // namespace
void TieringManager::RequestOsrAtNextOpportunity(JSFunction function) {
DisallowGarbageCollection no_gc;
TryRequestOsrAtNextOpportunity(isolate_, function);
}
void TieringManager::MaybeOptimizeFrame(JSFunction function,
JavaScriptFrame* frame,
UnoptimizedFrame* frame,
CodeKind code_kind) {
if (function.IsInOptimizationQueue()) {
const OptimizationMarker opt_marker =
function.feedback_vector().optimization_marker();
if (V8_UNLIKELY(opt_marker == OptimizationMarker::kInOptimizationQueue)) {
// Note: This effectively disables OSR for the function while it is being
// compiled.
TraceInOptimizationQueue(function);
return;
}
if (FLAG_testing_d8_test_runner &&
if (V8_UNLIKELY(FLAG_testing_d8_test_runner) &&
!PendingOptimizationTable::IsHeuristicOptimizationAllowed(isolate_,
function)) {
TraceHeuristicOptimizationDisallowed(function);
......@@ -225,46 +292,41 @@ void TieringManager::MaybeOptimizeFrame(JSFunction function,
}
// TODO(v8:7700): Consider splitting this up for Maglev/Turbofan.
if (function.shared().optimization_disabled()) return;
if (frame->is_unoptimized()) {
if (V8_UNLIKELY(FLAG_always_osr)) {
AttemptOnStackReplacement(UnoptimizedFrame::cast(frame),
BytecodeArray::kMaxOsrUrgency);
// Fall through and do a normal optimized compile as well.
} else if (MaybeOSR(function, UnoptimizedFrame::cast(frame))) {
return;
}
if (V8_UNLIKELY(function.shared().optimization_disabled())) return;
if (V8_UNLIKELY(FLAG_always_osr)) {
TryRequestOsrAtNextOpportunity(isolate_, function);
// Continue below and do a normal optimized compile as well.
}
OptimizationDecision d = ShouldOptimize(function, code_kind, frame);
if (d.should_optimize()) Optimize(function, code_kind, d);
}
// If we have matching cached OSR'd code, request OSR at the next opportunity.
int osr_urgency_for_cached_osr_code;
if (HaveCachedOSRCodeForCurrentBytecodeOffset(
frame, &osr_urgency_for_cached_osr_code)) {
TryRequestOsrForCachedOsrCode(isolate_, function,
osr_urgency_for_cached_osr_code);
}
bool TieringManager::MaybeOSR(JSFunction function, UnoptimizedFrame* frame) {
int ticks = function.feedback_vector().profiler_ticks();
if (function.IsMarkedForOptimization() ||
function.IsMarkedForConcurrentOptimization() ||
function.HasAvailableOptimizedCode()) {
int64_t allowance = kOSRBytecodeSizeAllowanceBase +
ticks * kOSRBytecodeSizeAllowancePerTick;
if (function.shared().GetBytecodeArray(isolate_).length() <= allowance) {
AttemptOnStackReplacement(frame);
const bool is_marked_for_any_optimization =
(static_cast<uint32_t>(opt_marker) & kNoneOrInOptimizationQueueMask) != 0;
if (is_marked_for_any_optimization || function.HasAvailableOptimizedCode()) {
// OSR kicks in only once we've previously decided to tier up, but we are
// still in the unoptimized frame (this implies a long-running loop).
if (SmallEnoughForOSR(isolate_, function)) {
TryIncrementOsrUrgency(isolate_, function);
}
return true;
}
return false;
}
namespace {
// Return unconditionally and don't run through the optimization decision
// again; we've already decided to tier up previously.
return;
}
bool ShouldOptimizeAsSmallFunction(int bytecode_size, bool any_ic_changed) {
return !any_ic_changed &&
bytecode_size < FLAG_max_bytecode_size_for_early_opt;
DCHECK(!is_marked_for_any_optimization &&
!function.HasAvailableOptimizedCode());
OptimizationDecision d = ShouldOptimize(function, code_kind, frame);
if (d.should_optimize()) Optimize(function, code_kind, d);
}
} // namespace
OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
CodeKind code_kind,
JavaScriptFrame* frame) {
......@@ -277,31 +339,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
return OptimizationDecision::DoNotOptimize();
}
// If function's SFI has OSR cache, once enter loop range of OSR cache, set
// OSR loop nesting level for matching condition of OSR (loop_depth <
// osr_level), soon later OSR will be triggered when executing bytecode
// JumpLoop which is entry of the OSR cache, then hit the OSR cache.
BytecodeArray bytecode = function.shared().GetBytecodeArray(isolate_);
if (V8_UNLIKELY(function.shared().osr_code_cache_state() > kNotCached) &&
frame->is_unoptimized()) {
int current_offset =
static_cast<UnoptimizedFrame*>(frame)->GetBytecodeOffset();
OSROptimizedCodeCache cache =
function.context().native_context().GetOSROptimizedCodeCache();
std::vector<int> bytecode_offsets =
cache.GetBytecodeOffsetsFromSFI(function.shared());
interpreter::BytecodeArrayIterator iterator(
Handle<BytecodeArray>(bytecode, isolate_));
for (int jump_offset : bytecode_offsets) {
iterator.SetOffset(jump_offset);
int jump_target_offset = iterator.GetJumpTargetOffset();
if (jump_offset >= current_offset &&
current_offset >= jump_target_offset) {
bytecode.set_osr_urgency(iterator.GetImmediateOperand(1) + 1);
return OptimizationDecision::TurbofanHotAndStable();
}
}
}
const int ticks = function.feedback_vector().profiler_ticks();
const int ticks_for_optimization =
FLAG_ticks_before_optimization +
......@@ -324,6 +362,7 @@ OptimizationDecision TieringManager::ShouldOptimize(JSFunction function,
bytecode.length(), FLAG_max_bytecode_size_for_early_opt);
}
}
return OptimizationDecision::DoNotOptimize();
}
......@@ -402,9 +441,9 @@ void TieringManager::OnInterruptTick(Handle<JSFunction> function) {
function_obj.feedback_vector().SaturatingIncrementProfilerTicks();
JavaScriptFrameIterator it(isolate_);
DCHECK(it.frame()->is_unoptimized());
UnoptimizedFrame* frame = UnoptimizedFrame::cast(it.frame());
const CodeKind code_kind = function_obj.GetActiveTier().value();
MaybeOptimizeFrame(function_obj, it.frame(), code_kind);
MaybeOptimizeFrame(function_obj, frame, code_kind);
}
} // namespace internal
......
......@@ -32,8 +32,8 @@ class TieringManager {
void NotifyICChanged() { any_ic_changed_ = true; }
void AttemptOnStackReplacement(UnoptimizedFrame* frame,
int nesting_levels = 1);
// After this request, the next JumpLoop will perform OSR.
void RequestOsrAtNextOpportunity(JSFunction function);
// For use when a JSFunction is available.
static int InterruptBudgetFor(Isolate* isolate, JSFunction function);
......@@ -43,12 +43,10 @@ class TieringManager {
private:
// Make the decision whether to optimize the given function, and mark it for
// optimization if the decision was 'yes'.
void MaybeOptimizeFrame(JSFunction function, JavaScriptFrame* frame,
// This function is also responsible for bumping the OSR urgency.
void MaybeOptimizeFrame(JSFunction function, UnoptimizedFrame* frame,
CodeKind code_kind);
// Potentially attempts OSR from and returns whether no other
// optimization attempts should be made.
bool MaybeOSR(JSFunction function, UnoptimizedFrame* frame);
OptimizationDecision ShouldOptimize(JSFunction function, CodeKind code_kind,
JavaScriptFrame* frame);
void Optimize(JSFunction function, CodeKind code_kind,
......
......@@ -12,15 +12,13 @@
namespace v8 {
namespace internal {
// This enum are states that how many OSR code caches belong to a SFI. Without
// this enum, need to check all OSR code cache entries to know whether a
// JSFunction's SFI has OSR code cache. The enum value kCachedMultiple is for
// doing time-consuming loop check only when the very unlikely state change
// kCachedMultiple -> { kCachedOnce | kCachedMultiple }.
// This enum is a performance optimization for accessing the OSR code cache -
// we can skip cache iteration in many cases unless there are multiple entries
// for a particular SharedFunctionInfo.
enum OSRCodeCacheStateOfSFI : uint8_t {
kNotCached, // Likely state, no OSR code cache
kCachedOnce, // Unlikely state, one OSR code cache
kCachedMultiple, // Very unlikely state, multiple OSR code caches
kNotCached, // Likely state.
kCachedOnce, // Unlikely state, one entry.
kCachedMultiple, // Very unlikely state, multiple entries.
};
class V8_EXPORT OSROptimizedCodeCache : public WeakFixedArray {
......
......@@ -581,10 +581,8 @@ RUNTIME_FUNCTION(Runtime_OptimizeOsr) {
function->MarkForOptimization(isolate, CodeKind::TURBOFAN,
ConcurrencyMode::kNotConcurrent);
// Make the profiler arm all back edges in unoptimized code.
if (it.frame()->is_unoptimized()) {
isolate->tiering_manager()->AttemptOnStackReplacement(
UnoptimizedFrame::cast(it.frame()), BytecodeArray::kMaxOsrUrgency);
isolate->tiering_manager()->RequestOsrAtNextOpportunity(*function);
}
return ReadOnlyRoots(isolate).undefined_value();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment